[PATCH 1/3] Add PTWRITE builtins for x86

classic Classic list List threaded Threaded
19 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/3] Add PTWRITE builtins for x86

Andi Kleen-3
From: Andi Kleen <[hidden email]>

Add builtins/intrinsics for PTWRITE. PTWRITE is a new instruction on Intel Gemini Lake/
Goldmont Plus that allows to write values into the Processor Trace log. This allows
very light weight instrumentation of programs.

The intrinsics are compatible to icc. Automatically enabled for Goldmont Plus.

gcc/:

2018-11-03  Andi Kleen  <[hidden email]>

        * common/config/i386/i386-common.c (OPTION_MASK_ISA_PTWRITE_SET): New.
        (OPTION_MASK_ISA_PTWRITE_UNSET): New.
        (ix86_handle_option): Handle OPT_mptwrite.
        * config/i386/cpuid.h (bit_PTWRITE): Add.
        * config/i386/driver-i386.c (host_detect_local_cpu): Detect ptwrite.
        * config/i386/i386-builtin.def (BDESC): Add ptwrite32/64.
        * config/i386/i386-c.c (ix86_target_macros_internal): Define __PTWRITE__.
        * config/i386/i386.c (ix86_target_string): Handle ptwrite.
        (ix86_option_override_internal): Handle PTA_PTWRITE.
        (ix86_valid_target_attribute_inner_p): Define ptwrite.
        (def_builtin2): Force UINT64 to be 64bit only.
        * config/i386/i386.h (TARGET_PTWRITE): Add.
        (TARGET_PTWRITE_P): Add.
        (PTA_PTWRITE): Add.
        * config/i386/i386.md: Define ptwrite.
        * config/i386/i386.opt: Add -mptwrite.
        * config/i386/immintrin.h (_ptwrite64): Add.
        (_ptwrite32): Add
        * doc/extend.texi: Document __builtin_ia32_ptwrite*.
        * doc/invoke.texi: Document -mptwrite.

gcc/testsuite/ChangeLog:

2018-11-03  Andi Kleen  <[hidden email]>

        * gcc.target/i386/ptwrite1.c: New test.
        * gcc.target/i386/ptwrite2.c: New test.
---
 gcc/common/config/i386/i386-common.c     | 15 ++++++++++++
 gcc/config/i386/cpuid.h                  |  4 ++++
 gcc/config/i386/driver-i386.c            | 12 ++++++++++
 gcc/config/i386/i386-builtin.def         |  4 ++++
 gcc/config/i386/i386-c.c                 |  2 ++
 gcc/config/i386/i386.c                   |  9 ++++++-
 gcc/config/i386/i386.h                   |  5 +++-
 gcc/config/i386/i386.md                  | 10 ++++++++
 gcc/config/i386/i386.opt                 |  4 ++++
 gcc/config/i386/immintrin.h              | 26 ++++++++++++++++++++
 gcc/doc/extend.texi                      |  9 +++++++
 gcc/doc/invoke.texi                      |  7 ++++--
 gcc/testsuite/gcc.target/i386/ptwrite1.c | 30 ++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/ptwrite2.c | 14 +++++++++++
 14 files changed, 147 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/ptwrite1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ptwrite2.c

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index f12806ef3a9..f740995c1e4 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -140,6 +140,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_PTWRITE_SET OPTION_MASK_ISA_PTWRITE
 #define OPTION_MASK_ISA_F16C_SET \
   (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
 #define OPTION_MASK_ISA_MWAITX_SET OPTION_MASK_ISA_MWAITX
@@ -267,6 +268,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_PTWRITE_UNSET OPTION_MASK_ISA_PTWRITE
 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
 
 #define OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET \
@@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts,
  }
       return true;
 
+    case OPT_mptwrite:
+      if (value)
+ {
+  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE_SET;
+  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_SET;
+ }
+      else
+ {
+  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_PTWRITE_UNSET;
+  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_UNSET;
+ }
+      return true;
+
     case OPT_mf16c:
       if (value)
  {
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 7e9e2d153dc..2e6d4a55602 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -126,6 +126,10 @@
 #define bit_XSAVEC (1 << 1)
 #define bit_XSAVES (1 << 3)
 
+/* PT sub leaf (%eax == 14, %ecx == 0) */
+/* %ebx */
+#define bit_PTWRITE (1 << 4)
+
 /* Signatures for different CPU implementations as returned in uses
    of cpuid with level 0.  */
 #define signature_AMD_ebx 0x68747541
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 8c830bde1dd..423b1c3827f 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -427,6 +427,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_waitpkg = 0;
   unsigned int has_cldemote = 0;
 
+  unsigned int has_ptwrite = 0;
+
   bool arch;
 
   unsigned int l2sizekb = 0;
@@ -542,6 +544,13 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_xsaves = eax & bit_XSAVES;
     }
 
+  if (max_level >= 0x14)
+    {
+      __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
+
+      has_ptwrite = ebx & bit_PTWRITE;
+    }
+
   /* Check cpuid level of extended features.  */
   __cpuid (0x80000000, ext_level, ebx, ecx, edx);
 
@@ -1124,6 +1133,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
       const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
       const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
+      const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
+
       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
  sse4a, cx16, sahf, movbe, aes, sha, pclmul,
  popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
@@ -1137,6 +1148,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
  clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
  avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
  avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
+ ptwrite,
  NULL);
     }
 
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index df0f7e975ac..ca3f357228f 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -2879,6 +2879,10 @@ BDESC_FIRST (special_args2, SPECIAL_ARGS2,
  OPTION_MASK_ISA_WBNOINVD, CODE_FOR_wbnoinvd, "__builtin_ia32_wbnoinvd", IX86_BUILTIN_WBNOINVD, UNKNOWN, (int) VOID_FTYPE_VOID)
 BDESC (OPTION_MASK_ISA_MOVDIR64B, CODE_FOR_nothing, "__builtin_ia32_movdir64b", IX86_BUILTIN_MOVDIR64B, UNKNOWN, (int) VOID_FTYPE_PVOID_PCVOID)
 
+/* PTWRITE */
+BDESC (OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritesi, "__builtin_ia32_ptwrite32", IX86_BUILTIN_PTWRITE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
+BDESC (OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritedi, "__builtin_ia32_ptwrite64", IX86_BUILTIN_PTWRITE64, UNKNOWN, (int) VOID_FTYPE_UINT64)
+
 BDESC_END (SPECIAL_ARGS2, MULTI_ARG)
 
 /* FMA4 and XOP.  */
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 005e1a5b308..4661d00f85d 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -532,6 +532,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__WAITPKG__");
   if (isa_flag2 & OPTION_MASK_ISA_CLDEMOTE)
     def_or_undef (parse_in, "__CLDEMOTE__");
+  if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
+    def_or_undef (parse_in, "__PTWRITE__");
   if (TARGET_IAMCU)
     {
       def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 963c7fcbb34..490bb6292a8 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2793,7 +2793,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
     { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
     { "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B },
     { "-mwaitpkg", OPTION_MASK_ISA_WAITPKG },
-    { "-mcldemote", OPTION_MASK_ISA_CLDEMOTE }
+    { "-mcldemote", OPTION_MASK_ISA_CLDEMOTE },
+    { "-mptwrite", OPTION_MASK_ISA_PTWRITE }
   };
   static struct ix86_target_opts isa_opts[] =
   {
@@ -3875,6 +3876,9 @@ ix86_option_override_internal (bool main_args_p,
  if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0)
     && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD))
   opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD;
+ if (((processor_alias_table[i].flags & PTA_PTWRITE) != 0)
+    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PTWRITE))
+  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE;
 
  if ((processor_alias_table[i].flags
    & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)
@@ -5077,6 +5081,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
     IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
     IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
+    IX86_ATTR_ISA ("ptwrite",   OPT_mptwrite),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
@@ -30180,6 +30185,8 @@ def_builtin2 (HOST_WIDE_INT mask, const char *name,
   tree decl = NULL_TREE;
 
   ix86_builtins_isa[(int) code].isa2 = mask;
+  if (tcode == VOID_FTYPE_UINT64)
+    ix86_builtins_isa[(int) code].isa = OPTION_MASK_ISA_64BIT;
 
   if (mask == 0
       || (mask & ix86_isa_flags2) != 0
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 01d49a7263b..39d3e59c8dd 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -191,6 +191,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_WAITPKG_P(x) TARGET_ISA_WAITPKG_P(x)
 #define TARGET_CLDEMOTE TARGET_ISA_CLDEMOTE
 #define TARGET_CLDEMOTE_P(x) TARGET_ISA_CLDEMOTE_P(x)
+#define TARGET_PTWRITE TARGET_ISA_PTWRITE
+#define TARGET_PTWRITE_P(x) TARGET_ISA_PTWRITE_P(x)
 
 #define TARGET_LP64 TARGET_ABI_64
 #define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
@@ -2354,6 +2356,7 @@ const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
 const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
 const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
 const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9);
+const wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10);
 
 const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
   | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -2389,7 +2392,7 @@ const wide_int_bitmask PTA_GOLDMONT = PTA_SILVERMONT | PTA_SHA | PTA_XSAVE
   | PTA_RDSEED | PTA_XSAVEC | PTA_XSAVES | PTA_CLFLUSHOPT | PTA_XSAVEOPT
   | PTA_FSGSBASE;
 const wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID
-  | PTA_SGX;
+  | PTA_SGX | PTA_PTWRITE;
 const wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
   | PTA_GFNI;
 const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7fb2b144f47..bdc39456106 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -292,6 +292,8 @@
 
   ;; For Speculation Barrier support
   UNSPECV_SPECULATION_BARRIER
+
+  UNSPECV_PTWRITE
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -19498,6 +19500,14 @@
   [(set_attr "type" "other")
    (set_attr "prefix_extra" "2")])
 
+(define_insn "ptwrite<mode>"
+  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
+    UNSPECV_PTWRITE)]
+  "TARGET_PTWRITE"
+  "ptwrite\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix_extra" "2")])
+
 (define_insn "rdrand<mode>_1"
   [(set (match_operand:SWI248 0 "register_operand" "=r")
  (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index e7fbf9b6f99..1705815a2ec 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -785,6 +785,10 @@ mwbnoinvd
 Target Report Mask(ISA_WBNOINVD) Var(ix86_isa_flags2) Save
 Support WBNOINVD built-in functions and code generation.
 
+mptwrite
+Target Report Mask(ISA_PTWRITE) Var(ix86_isa_flags2) Save
+Support PTWRITE built-in functions and code generation.
+
 msgx
 Target Report Mask(ISA_SGX) Var(ix86_isa_flags2) Save
 Support SGX built-in functions and code generation.
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 344e92c745a..b52ab91b4d5 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -251,4 +251,30 @@ _rdrand64_step (unsigned long long *__P)
 
 #endif /* __x86_64__  */
 
+#ifndef __PTWRITE__
+#pragma GCC push_options
+#pragma GCC target("ptwrite")
+#define __DISABLE_PTWRITE__
+#endif
+
+#ifdef __x86_64__
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite64 (unsigned long long __B)
+{
+  __builtin_ia32_ptwrite64 (__B);
+}
+#endif /* __x86_64__ */
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_ptwrite32 (unsigned __B)
+{
+  __builtin_ia32_ptwrite32 (__B);
+}
+#ifdef __DISABLE_PTWRITE__
+#undef __DISABLE_PTWRITE__
+#pragma GCC pop_options
+#endif /* __DISABLE_PTWRITE__ */
+
 #endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index e2b9ee11a54..1eca009e255 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -21331,6 +21331,15 @@ unsigned int __builtin_ia32_rdrand32_step (unsigned int *)
 unsigned int __builtin_ia32_rdrand64_step (unsigned long long *)
 @end smallexample
 
+The following built-in function is available when @option{-mptwrite} is
+used.  All of them generate the machine instruction that is part of the
+name.
+
+@smallexample
+void __builtin_ia32_ptwrite32 (unsigned)
+void __builtin_ia32_ptwrite64 (unsigned long long)
+@end smallexample
+
 The following built-in functions are available when @option{-msse4a} is used.
 All of them generate the machine instruction that is part of the name.
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e290128f535..cb5bc7bafc5 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1281,7 +1281,7 @@ See RS/6000 and PowerPC Options.
 -mmmx  -msse  -msse2  -msse3  -mssse3  -msse4.1  -msse4.2  -msse4  -mavx @gol
 -mavx2  -mavx512f  -mavx512pf  -mavx512er  -mavx512cd  -mavx512vl @gol
 -mavx512bw  -mavx512dq  -mavx512ifma  -mavx512vbmi  -msha  -maes @gol
--mpclmul  -mfsgsbase  -mrdrnd  -mf16c  -mfma -mpconfig -mwbnoinvd @gol
+-mpclmul  -mfsgsbase  -mrdrnd  -mf16c  -mfma -mpconfig -mwbnoinvd -mptwrite @gol
 -mprefetchwt1  -mclflushopt  -mxsavec  -mxsaves @gol
 -msse4a  -m3dnow  -m3dnowa  -mpopcnt  -mabm  -mbmi  -mtbm  -mfma4  -mxop @gol
 -mlzcnt  -mbmi2  -mfxsr  -mxsave  -mxsaveopt  -mrtm  -mlwp @gol
@@ -27815,6 +27815,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mfsgsbase
 @opindex mfsgsbase
 @need 200
+@itemx -mptwrite
+@opindex mptwrite
+@need 200
 @itemx -mrdrnd
 @opindex mrdrnd
 @need 200
@@ -27923,7 +27926,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @opindex mcldemote
 These switches enable the use of instructions in the MMX, SSE,
 SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD,
-SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
+SHA, AES, PCLMUL, FSGSBASE, PTWRITE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
 AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES, WAITPKG,
 FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2,
 GFNI, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B,
diff --git a/gcc/testsuite/gcc.target/i386/ptwrite1.c b/gcc/testsuite/gcc.target/i386/ptwrite1.c
new file mode 100644
index 00000000000..e09028ed428
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ptwrite1.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+void ptwrite1(int a)
+{
+  __builtin_ia32_ptwrite32 (a);
+}
+
+#ifdef __x86_64__
+void ptwrite2(unsigned long b)
+{
+  __builtin_ia32_ptwrite64 (b);
+}
+
+void ptwrite3(unsigned char b)
+{
+  __builtin_ia32_ptwrite64 (b);
+}
+
+void ptwrite4(unsigned short b)
+{
+  __builtin_ia32_ptwrite64 (b);
+}
+#endif
+
+void ptwrite5(unsigned short b)
+{
+  __builtin_ia32_ptwrite32 (b);
+}
diff --git a/gcc/testsuite/gcc.target/i386/ptwrite2.c b/gcc/testsuite/gcc.target/i386/ptwrite2.c
new file mode 100644
index 00000000000..299c6511ef4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ptwrite2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite " } */
+/* { dg-final { scan-assembler "ptwrite.*r" } } */
+/* { dg-final { scan-assembler "ptwrite.*e" } } */
+
+#include <x86intrin.h>
+
+void ptwrite1(void)
+{
+  _ptwrite32 (1);
+#ifdef __x86_64__
+  _ptwrite64 (2);
+#endif
+}
--
2.19.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Andi Kleen-3
From: Andi Kleen <[hidden email]>

Add a new pass to automatically instrument changes to variables
with the new PTWRITE instruction on x86. PTWRITE writes a 4 or 8 byte
field into an Processor Trace log, which allows log over head
logging of informatin.

This allows to reconstruct how values later, which can be useful for
debugging or other analysis of the program behavior. With the compiler
support this can be done with without having to manually add instrumentation
to the code.

Using dwarf information this can be later mapped back to the variables.

There are new options to enable instrumentation for different types,
and also a new attribute to control analysis fine grained per
function or variable level. The attributes can be set on both
the variable and the type level, and also on structure fields.
This allows to enable tracing only for specific code in large
programs.

The pass is generic, but only the x86 backend enables the necessary
hooks. When the backend enables the necessary hooks (with -mptwrite)
there is an additional pass that looks through the code for
attribute vartrace enabled functions or variables.

The -fvartrace-locals options is experimental: it works, but it
generates redundant ptwrites because the pass doesn't use
the SSA information to minimize instrumentation. This could be optimized
later.

Currently the code can be tested with SDE, or on a Intel
Gemini Lake system with a new enough Linux kernel (v4.10+)
that supports PTWRITE for PT. Linux perf can be used to
record the values

perf record -e intel_pt/ptw=1,branch=0/ program
perf script --itrace=crw -F +synth ...

I have an experimential version of perf that can also use
dwarf information to symbolize many[1] values back to their variable
names. So far it is not in standard perf, but available at

https://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git/log/?h=perf/var-resolve-4

It is currently not able to decode all variable locations to names,
but a large subset.

Longer term hopefully gdb will support this information too.

The CPU can potentially generate very data high bandwidths when
code doing a lot of computation is heavily instrumented.
This can cause some data loss in both the CPU and also in perf
logging the data when the disk cannot keep up.

Running some larger workloads most workloads do not cause
CPU level overflows, but I've seen it with -fvartrace
with crafty, and with more workloads with -fvartrace-locals.

Recommendation is to not fully instrument programs,
but only areas of interest either at the file level or using
the attributes.

The other thing is that perf and the disk often cannot keep up
with the data bandwidth for longer computations. In this case
it's possible to use perf snapshot mode (add --snapshot
to the command line above). The data will be only logged to
a memory ring buffer then, and only dump the buffers on events
of interest by sending SIGUSR2 to the perf binrary.

In the future this will be hopefully better supported with
core files and gdb.

Passes bootstrap and test suite on x86_64-linux, also
bootstrapped and tested gcc itself with full -fvartrace
and -fvartrace-locals instrumentation.

gcc/:

2018-11-03  Andi Kleen  <[hidden email]>

        * Makefile.in: Add tree-vartrace.o.
        * common.opt: Add -fvartrace, -fvartrace-returns,
        -fvartrace-args, -fvartrace-reads, -fvartrace-writes,
        -fvartrace-locals
        * config/i386/i386.c (ix86_vartrace_func): Add.
        (TARGET_VARTRACE_FUNC): Add.
        * doc/extend.texi: Document vartrace/no_vartrace
        attributes.
        * doc/invoke.texi: Document -fvartrace, -fvartrace-returns,
        -fvartrace-args, -fvartrace-reads, -fvartrace-writes,
        -fvartrace-locals
        * doc/tm.texi (TARGET_VARTRACE_FUNC): Add.
        * passes.def: Add vartrace pass.
        * target.def (vartrace_func): Add.
        * tree-pass.h (make_pass_vartrace): Add.
        * tree-vartrace.c: New file to implement vartrace pass.

gcc/c-family/:

2018-11-03  Andi Kleen  <[hidden email]>

        * c-attribs.c (handle_vartrace_attribute): New function.

config/:

2018-11-03  Andi Kleen  <[hidden email]>

        * bootstrap-vartrace.mk: New.
        * bootstrap-vartrace-locals.mk: New.
---
 config/bootstrap-vartrace-locals.mk |   3 +
 config/bootstrap-vartrace.mk        |   3 +
 gcc/Makefile.in                     |   1 +
 gcc/c-family/c-attribs.c            |  23 ++
 gcc/common.opt                      |  24 ++
 gcc/config/i386/i386.c              |  16 +
 gcc/doc/extend.texi                 |  13 +
 gcc/doc/invoke.texi                 |  29 ++
 gcc/doc/tm.texi                     |   4 +
 gcc/doc/tm.texi.in                  |   2 +
 gcc/passes.def                      |   1 +
 gcc/target.def                      |   7 +
 gcc/tree-pass.h                     |   1 +
 gcc/tree-vartrace.c                 | 463 ++++++++++++++++++++++++++++
 14 files changed, 590 insertions(+)
 create mode 100644 config/bootstrap-vartrace-locals.mk
 create mode 100644 config/bootstrap-vartrace.mk
 create mode 100644 gcc/tree-vartrace.c

diff --git a/config/bootstrap-vartrace-locals.mk b/config/bootstrap-vartrace-locals.mk
new file mode 100644
index 00000000000..c6c79e21120
--- /dev/null
+++ b/config/bootstrap-vartrace-locals.mk
@@ -0,0 +1,3 @@
+STAGE2_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
+STAGE3_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
+STAGE4_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
diff --git a/config/bootstrap-vartrace.mk b/config/bootstrap-vartrace.mk
new file mode 100644
index 00000000000..e29824d799b
--- /dev/null
+++ b/config/bootstrap-vartrace.mk
@@ -0,0 +1,3 @@
+STAGE2_CFLAGS += -mptwrite -fvartrace
+STAGE3_CFLAGS += -mptwrite -fvartrace
+STAGE4_CFLAGS += -mptwrite -fvartrace
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 719a516c356..46aa4800e57 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1594,6 +1594,7 @@ OBJS = \
  tree-vectorizer.o \
  tree-vector-builder.o \
  tree-vrp.o \
+ tree-vartrace.o \
  tree.o \
  typed-splay-tree.o \
  unique-ptr-tests.o \
diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index 4416b5042f7..66bbd87921f 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -104,6 +104,8 @@ static tree handle_tls_model_attribute (tree *, tree, tree, int,
  bool *);
 static tree handle_no_instrument_function_attribute (tree *, tree,
      tree, int, bool *);
+static tree handle_vartrace_attribute (tree *, tree,
+     tree, int, bool *);
 static tree handle_no_profile_instrument_function_attribute (tree *, tree,
      tree, int, bool *);
 static tree handle_malloc_attribute (tree *, tree, tree, int, bool *);
@@ -325,6 +327,12 @@ const struct attribute_spec c_common_attribute_table[] =
   { "no_instrument_function", 0, 0, true,  false, false, false,
       handle_no_instrument_function_attribute,
       NULL },
+  { "vartrace",      0, 0, false,  false, false, false,
+      handle_vartrace_attribute,
+      NULL },
+  { "no_vartrace",      0, 0, false,  false, false, false,
+      handle_vartrace_attribute,
+      NULL },
   { "no_profile_instrument_function",  0, 0, true, false, false, false,
       handle_no_profile_instrument_function_attribute,
       NULL },
@@ -767,6 +775,21 @@ handle_no_sanitize_undefined_attribute (tree *node, tree name, tree, int,
   return NULL_TREE;
 }
 
+/* Handle "vartrace"/"no_vartrace" attributes; arguments as in
+   struct attribute_spec.handler.  */
+
+static tree
+handle_vartrace_attribute (tree *node, tree, tree, int flags,
+   bool *)
+{
+  if (TYPE_P (*node) && !(flags & (int) ATTR_FLAG_TYPE_IN_PLACE))
+    *node = build_variant_type_copy (*node);
+
+  /* Can apply to types, functions, variables.  */
+  /* We lookup it up later with lookup_attribute.  */
+  return NULL_TREE;
+}
+
 /* Handle an "asan odr indicator" attribute; arguments as in
    struct attribute_spec.handler.  */
 
diff --git a/gcc/common.opt b/gcc/common.opt
index 2971dc21b1f..930acf40588 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2811,6 +2811,30 @@ ftree-scev-cprop
 Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
 Enable copy propagation of scalar-evolution information.
 
+fvartrace
+Common Report Var(flag_vartrace)
+Generate all variable tracking instrumentations, except for locals.
+
+fvartrace-returns
+Common Report Var(flag_vartrace_returns)
+Generate variable tracking instructions for function returns.
+
+fvartrace-args
+Common Report Var(flag_vartrace_args)
+Generate variable tracking instructions for function arguments.
+
+fvartrace-reads
+Common Report Var(flag_vartrace_reads)
+Generate variable tracking instructions for reads.
+
+fvartrace-writes
+Common Report Var(flag_vartrace_writes)
+Generate variable tracking instructions for writes.
+
+fvartrace-locals
+Common Report Var(flag_vartrace_locals)
+Generate variable tracking instructions for locals.
+
 ; -fverbose-asm causes extra commentary information to be produced in
 ; the generated assembly code (to make it more readable).  This option
 ; is generally only of use to those who actually need to read the
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 490bb6292a8..4337121c239 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -31873,6 +31873,19 @@ ix86_mangle_function_version_assembler_name (tree decl, tree id)
 }
 
 
+static tree
+ix86_vartrace_func (tree type)
+{
+  if (!(ix86_isa_flags2 & OPTION_MASK_ISA_PTWRITE))
+    return NULL;
+  if (TYPE_PRECISION (type) == 32)
+    return ix86_builtins [(int) IX86_BUILTIN_PTWRITE32];
+  else if (TYPE_PRECISION (type) == 64)
+    return ix86_builtins [(int) IX86_BUILTIN_PTWRITE64];
+  else
+    return NULL;
+}
+
 static tree
 ix86_mangle_decl_assembler_name (tree decl, tree id)
 {
@@ -50849,6 +50862,9 @@ ix86_run_selftests (void)
 #undef TARGET_ASAN_SHADOW_OFFSET
 #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
 
+#undef TARGET_VARTRACE_FUNC
+#define TARGET_VARTRACE_FUNC ix86_vartrace_func
+
 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1eca009e255..08286aa4591 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -3193,6 +3193,13 @@ the standard C library can be guaranteed not to throw an exception
 with the notable exceptions of @code{qsort} and @code{bsearch} that
 take function pointer arguments.
 
+@item no_vartrace
+@cindex @code{no_vartrace} function or variable attribute
+Disable data tracing for the function or variable or structured field
+marked with this attribute. Applies to types. Currently implemented
+for x86 when the @option{ptwrite} target option is enabled for systems
+that support the @code{PTWRITE} instruction.
+
 @item optimize (@var{level}, @dots{})
 @item optimize (@var{string}, @dots{})
 @cindex @code{optimize} function attribute
@@ -3454,6 +3461,12 @@ When applied to a member function of a C++ class template, the
 attribute also means that the function is instantiated if the
 class itself is instantiated.
 
+@item vartrace
+@cindex @code{vartrace} function or variable attribute
+Enable data tracing for the function or variable or structure field
+marked with this attribute. Applies to types. Will not trace locals,
+but arguments, returns, globals, pointer references.
+
 @item visibility ("@var{visibility_type}")
 @cindex @code{visibility} function attribute
 This attribute affects the linkage of the declaration to which it is attached.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index cb5bc7bafc5..2f10b3c1023 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -2720,6 +2720,35 @@ Don't use the @code{__cxa_get_exception_ptr} runtime routine.  This
 causes @code{std::uncaught_exception} to be incorrect, but is necessary
 if the runtime routine is not available.
 
+@item -fvartrace
+@opindex -fvartrace
+Insert trace instructions to trace variable values at runtime.
+Requires enabling a backend specific option, like @option{-mptwrite} to enable
+@code{PTWRITE} instruction generation on x86. @option{-fvartrace} traces
+arguments, return values, pointer references and globals, but no locals.
+
+@item -fvartrace-args
+@opindex -fvartrace-args
+Trace arguments. Can be used independently or together with @option{-vartrace},
+or as @option{-fno-vartrace-args} to disable.
+
+@item -fvartrace-returns
+@opindex -fvartrace-returns
+Trace return values.  Can be used independently or together with @option{-vartrace},
+or as @option{-fno-vartrace-return} to disable.
+
+@item -fvartrace-reads
+@opindex -fvartrace-reads
+Trace reads.
+
+@item -fvartrace-writes
+@opindex -fvartrace-writes
+Trace writes.
+
+@item -fvartrace-locals
+@opindex -fvartrace-locals
+Insert code to trace local variables. This can have high overhead.
+
 @item -fvisibility-inlines-hidden
 @opindex fvisibility-inlines-hidden
 This switch declares that the user does not attempt to compare
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index f841527f971..6555cb122e9 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11933,6 +11933,10 @@ Address Sanitizer shadow memory address.  NULL if Address Sanitizer is not
 supported by the target.
 @end deftypefn
 
+@deftypefn {Target Hook} tree TARGET_VARTRACE_FUNC (tree @var{type})
+Return a builtin to call to trace variables or NULL if not supported by the target.
+@end deftypefn
+
 @deftypefn {Target Hook} {unsigned HOST_WIDE_INT} TARGET_MEMMODEL_CHECK (unsigned HOST_WIDE_INT @var{val})
 Validate target specific memory model mask bits. When NULL no target specific
 memory model bits are allowed.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 967ef3ad22f..7cce21bb26c 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8101,6 +8101,8 @@ and the associated definitions of those functions.
 
 @hook TARGET_ASAN_SHADOW_OFFSET
 
+@hook TARGET_VARTRACE_FUNC
+
 @hook TARGET_MEMMODEL_CHECK
 
 @hook TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
diff --git a/gcc/passes.def b/gcc/passes.def
index 24f212c8e31..518cb4ef6f7 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -179,6 +179,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_oacc_device_lower);
   NEXT_PASS (pass_omp_device_lower);
   NEXT_PASS (pass_omp_target_link);
+  NEXT_PASS (pass_vartrace);
   NEXT_PASS (pass_all_optimizations);
   PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
       NEXT_PASS (pass_remove_cgraph_callee_edges);
diff --git a/gcc/target.def b/gcc/target.def
index ad27d352ca4..db5d88efb95 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4300,6 +4300,13 @@ supported by the target.",
  unsigned HOST_WIDE_INT, (void),
  NULL)
 
+/* Defines the builtin to trace variables, or NULL.  */
+DEFHOOK
+(vartrace_func,
+ "Return a builtin to call to trace variables or NULL if not supported by the target.",
+ tree, (tree type),
+ NULL)
+
 /* Functions relating to calls - argument passing, returns, etc.  */
 /* Members of struct call have no special macro prefix.  */
 HOOK_VECTOR (TARGET_CALLS, calls)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index af15adc8e0c..2cf31785a6f 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -423,6 +423,7 @@ extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_post_ipa_warn (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_stdarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_vartrace (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
diff --git a/gcc/tree-vartrace.c b/gcc/tree-vartrace.c
new file mode 100644
index 00000000000..07f5aa6bc8f
--- /dev/null
+++ b/gcc/tree-vartrace.c
@@ -0,0 +1,463 @@
+/* Insert instructions for data value tracing.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   Contributed by Andi Kleen.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "tree.h"
+#include "tree-iterator.h"
+#include "tree-pass.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "gimplify-me.h"
+#include "gimple-ssa.h"
+#include "gimple-pretty-print.h"
+#include "cfghooks.h"
+#include "ssa.h"
+#include "tree-dfa.h"
+#include "attribs.h"
+
+enum attrstate { force_off, force_on, neutral };
+
+/* Can we trace with attributes ATTR.  */
+
+static attrstate supported_attr (tree attr)
+{
+  if (lookup_attribute ("no_vartrace", attr))
+    return force_off;
+  if (lookup_attribute ("vartrace", attr))
+    return force_on;
+  return neutral;
+}
+
+/* Is ARG supported considering S, handling both decls and other trees.  */
+
+static attrstate supported_op (tree arg, attrstate s)
+{
+  if (s != neutral)
+    return s;
+  if (DECL_P (arg))
+    {
+      s = supported_attr (DECL_ATTRIBUTES (arg));
+      if (s != neutral)
+ return s;
+    }
+  return supported_attr (TYPE_ATTRIBUTES (TREE_TYPE (arg)));
+}
+
+/* Can we trace T.  */
+
+static attrstate supported_type (tree t)
+{
+  tree type = TREE_TYPE (t);
+
+  if (!POINTER_TYPE_P (type) && !INTEGRAL_TYPE_P (type))
+    return force_off;
+  enum attrstate s = supported_op (t, neutral);
+  if (TREE_CODE (t) == COMPONENT_REF
+   || TREE_CODE (t) == ARRAY_REF)
+    {
+      s = supported_op (TREE_OPERAND (t, 0), s);
+      s = supported_op (TREE_OPERAND (t, 1), s);
+    }
+  return s;
+}
+
+/* Can we trace T, or if FORCE is set.  */
+
+static bool supported_type_or_force (tree t, bool force)
+{
+  enum attrstate s = supported_type (t);
+  if (s == neutral)
+    return force;
+  return s == force_off ? false : true;
+}
+
+/* Return true if T refering to a local variable.
+   ?? better ways to do this?  */
+
+static bool is_local (tree t)
+{
+  // Add another attribute to override?
+  if (!flag_vartrace_locals)
+    return false;
+  if (TREE_STATIC (t))
+    return false;
+  if (TREE_CODE (t) == VAR_DECL && DECL_EXTERNAL (t))
+    return false;
+  return true;
+}
+
+/* Is T something we can log, FORCEing the type if needed.  */
+
+static bool supported_mem (tree t, bool force)
+{
+  enum attrstate s = supported_type (t);
+
+  if (s == force_off)
+    return false;
+
+  switch (TREE_CODE (t))
+    {
+    case VAR_DECL:
+      if (DECL_ARTIFICIAL (t))
+ return false;
+      if (is_local (t))
+ return true;
+      return s == force_on || force;
+
+    case ARRAY_REF:
+    case COMPONENT_REF:
+      t = TREE_OPERAND (t, 0);
+      if (is_local (t))
+ return true;
+      return s == force_on || force;
+
+    case TARGET_MEM_REF:
+    case MEM_REF:
+      // could use points-to to check for locals?
+      return true;
+
+    case SSA_NAME:
+      if (flag_vartrace_locals && is_gimple_reg (t))
+ return true;
+      break;
+
+    default:
+      break;
+    }
+
+  return false;
+}
+
+/* Print debugging for inserting CALL at ORIG_STMT with type of VAL.  */
+
+static void log_trace_code (gimple *orig_stmt, gimple *code,
+    tree val)
+{
+  if (dump_file)
+    {
+      if (orig_stmt)
+ fprintf (dump_file, "BB%d ", gimple_bb (orig_stmt)->index);
+      fprintf (dump_file, "inserting ");
+      print_gimple_stmt (dump_file, code, 0, TDF_VOPS|TDF_MEMSYMS);
+      if (orig_stmt)
+ {
+  fprintf (dump_file, "orig ");
+  print_gimple_stmt (dump_file, orig_stmt, 2,
+     TDF_VOPS|TDF_MEMSYMS);
+ }
+      fprintf (dump_file, "type ");
+      print_generic_expr (dump_file, TREE_TYPE (val), TDF_SLIM);
+      fputc ('\n', dump_file);
+      fputc ('\n', dump_file);
+    }
+}
+
+/* Insert variable tracing code for VAL before iterator GI, originally
+   for ORIG_STMT.  Return trace variable or NULL.  */
+
+static tree insert_trace (gimple_stmt_iterator *gi, tree val,
+  gimple *orig_stmt)
+{
+  tree func = targetm.vartrace_func (TREE_TYPE (val));
+  if (!func)
+    return NULL_TREE;
+
+  location_t loc = gimple_location (orig_stmt);
+
+  gimple_seq seq = NULL;
+  tree tvar = make_ssa_name (TREE_TYPE (val));
+  gassign *assign = gimple_build_assign (tvar, val);
+  log_trace_code (orig_stmt, assign, val);
+  gimple_set_location (assign, loc);
+  gimple_seq_add_stmt (&seq, assign);
+
+  gcall *call = gimple_build_call (func, 1, tvar);
+  log_trace_code (NULL, call, tvar);
+  gimple_set_location (call, loc);
+  gimple_seq_add_stmt (&seq, call);
+
+  gsi_insert_seq_before (gi, seq, GSI_SAME_STMT);
+  return tvar;
+}
+
+/* Insert trace at GI for T in FUN if suitable memory or variable reference.
+   Always if FORCE. Originally on ORIG_STMT.  */
+
+tree instrument_mem (gimple_stmt_iterator *gi, tree t,
+     bool force,
+     gimple *orig_stmt)
+{
+  if (supported_mem (t, force))
+    return insert_trace (gi, t, orig_stmt);
+  return NULL_TREE;
+}
+
+/* Instrument arguments for FUN considering FORCE. Return true if
+   function has changed.  */
+
+bool instrument_args (function *fun, bool force)
+{
+  bool changed = false;
+  gimple_stmt_iterator gi;
+
+  /* Local tracing usually takes care of the argument too, when
+     they are read. This avoids redundant trace instructions.  */
+  if (flag_vartrace_locals)
+    return false;
+
+  for (tree arg = DECL_ARGUMENTS (current_function_decl);
+       arg != NULL_TREE;
+       arg = DECL_CHAIN (arg))
+    {
+     gi = gsi_start_bb (BASIC_BLOCK_FOR_FN (fun, NUM_FIXED_BLOCKS));
+     if (supported_type_or_force (arg, force || flag_vartrace_args))
+ {
+  tree func = targetm.vartrace_func (TREE_TYPE (arg));
+  if (!func)
+    continue;
+
+  tree sarg = NULL;
+  // ??? or force like sanopt?
+  if (is_gimple_reg (arg))
+    sarg = get_or_create_ssa_default_def (fun, arg);
+  if (!sarg)
+    continue;
+
+  if (has_zero_uses (sarg))
+    continue;
+
+  gimple_seq seq = NULL;
+  tree tvar = make_ssa_name (TREE_TYPE (sarg));
+  gassign *assign = gimple_build_assign (tvar, sarg);
+  gimple_set_location (assign, fun->function_start_locus);
+  gimple_seq_add_stmt (&seq, assign);
+
+  gcall *call = gimple_build_call (func, 1, tvar);
+  log_trace_code (NULL, call, tvar);
+  gimple_set_location (call, fun->function_start_locus);
+  gimple_seq_add_stmt (&seq, call);
+
+  edge edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (fun));
+  gsi_insert_seq_on_edge_immediate (edge, seq);
+
+  changed = true;
+ }
+    }
+  return changed;
+}
+
+/* Generate trace call for store STMT at GI, force if FORCE.  Return true
+   if successfull. Modifies the original store to use a temporary.  */
+
+static bool instrument_store (gimple_stmt_iterator *gi, gimple *stmt, bool force)
+{
+  if (!supported_mem (gimple_assign_lhs (stmt), force))
+    return false;
+
+  tree orig_tgt = gimple_assign_lhs (stmt);
+
+  tree func = targetm.vartrace_func (TREE_TYPE (orig_tgt));
+  if (!func)
+    return false;
+
+  tree new_tgt = make_ssa_name(TREE_TYPE (orig_tgt));
+  gimple_assign_set_lhs (stmt, new_tgt);
+  update_stmt (stmt);
+  log_trace_code (NULL, stmt, new_tgt);
+
+  gcall *tcall = gimple_build_call (func, 1, new_tgt);
+  log_trace_code (stmt, tcall, new_tgt);
+  gimple_set_location (tcall, gimple_location (stmt));
+  gsi_insert_after (gi, tcall, GSI_CONTINUE_LINKING);
+
+  gassign *new_store = gimple_build_assign (orig_tgt, new_tgt);
+  gimple_set_location (new_store, gimple_location (stmt));
+  log_trace_code (NULL, new_store, new_tgt);
+  gsi_insert_after (gi, new_store, GSI_CONTINUE_LINKING);
+  return true;
+}
+
+/* Instrument STMT at GI. Force if FORCE. CHANGED is the previous changed
+   state, which is also returned.  */
+
+bool instrument_assign (gimple_stmt_iterator *gi,
+ gimple *stmt, bool changed, bool force)
+{
+  gassign *gas = as_a <gassign *> (stmt);
+  bool read_force = force || flag_vartrace_reads;
+  tree t;
+
+  t = instrument_mem (gi, gimple_assign_rhs1 (gas),
+      read_force,
+      stmt);
+  if (t)
+    {
+      gimple_assign_set_rhs1 (gas, t);
+      changed = true;
+    }
+  if (gimple_num_ops (gas) > 2)
+    {
+      t = instrument_mem (gi, gimple_assign_rhs2 (gas),
+  read_force,
+  stmt);
+      if (t)
+ {
+  gimple_assign_set_rhs2 (gas, t);
+  changed = true;
+ }
+    }
+  if (gimple_num_ops (gas) > 3)
+    {
+      t = instrument_mem (gi, gimple_assign_rhs3 (gas),
+  read_force,
+  stmt);
+      if (t)
+ {
+  gimple_assign_set_rhs3 (gas, t);
+  changed = true;
+ }
+      }
+  if (gimple_num_ops (gas) > 4)
+    gcc_unreachable ();
+  if (gimple_store_p (stmt))
+    changed |= instrument_store (gi, stmt, flag_vartrace_writes || force);
+  if (changed)
+    update_stmt (stmt);
+  return changed;
+}
+
+/* Instrument return in function FUN at statement STMT at GI, force if
+   FORCE.  CHANGED is the changed flag, which is also returned.  */
+
+static bool instrument_return (function *fun,
+       gimple_stmt_iterator *gi,
+       gimple *stmt, bool changed,
+       bool force)
+{
+  tree restype = TREE_TYPE (TREE_TYPE (fun->decl));
+  greturn *gret = as_a <greturn *> (stmt);
+  tree rval = gimple_return_retval (gret);
+
+  /* Cannot handle complex C++ return values at this point, even
+     if they would collapse to a valid trace type.  */
+  if (rval
+      && useless_type_conversion_p (restype, TREE_TYPE (rval))
+      && supported_type_or_force (rval, flag_vartrace_returns || force))
+    {
+      if (tree tvar = insert_trace (gi, rval, stmt))
+ {
+  changed = true;
+  gimple_return_set_retval (gret, tvar);
+  log_trace_code (NULL, gret, tvar);
+  update_stmt (stmt);
+ }
+    }
+
+  return changed;
+}
+
+/* Insert vartrace calls for FUN.  */
+
+static unsigned int vartrace_execute (function *fun)
+{
+  basic_block bb;
+  gimple_stmt_iterator gi;
+  bool force = flag_vartrace;
+  bool changed;
+
+  if (lookup_attribute ("vartrace", TYPE_ATTRIBUTES (TREE_TYPE (fun->decl)))
+      || lookup_attribute ("vartrace", DECL_ATTRIBUTES (fun->decl)))
+    force = true;
+
+  changed = instrument_args (fun, force);
+
+  FOR_ALL_BB_FN (bb, fun)
+    for (gi = gsi_start_bb (bb); !gsi_end_p (gi); gsi_next (&gi))
+      {
+ gimple *stmt = gsi_stmt (gi);
+ if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
+  changed = instrument_assign (&gi, stmt, changed, force);
+ else if (gimple_code (stmt) == GIMPLE_RETURN)
+  {
+    changed = instrument_return (fun, &gi, stmt, changed, force);
+    // must end basic block
+    break;
+  }
+
+ // instrument something else like CALL?
+ // We assume everything interesting is in a GIMPLE_ASSIGN
+      }
+  return changed ? TODO_update_ssa : 0;
+}
+
+const pass_data pass_data_vartrace =
+{
+  GIMPLE_PASS, /* type */
+  "vartrace", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_vartrace : public gimple_opt_pass
+{
+public:
+  pass_vartrace (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_vartrace, ctxt)
+  {}
+
+  virtual opt_pass * clone ()
+    {
+      return new pass_vartrace (m_ctxt);
+    }
+
+  virtual bool gate (function *fun)
+    {
+      // check if vartrace is supported in backend
+      if (!targetm.vartrace_func ||
+  targetm.vartrace_func (integer_type_node) == NULL)
+ return false;
+
+      if (lookup_attribute ("no_vartrace", TYPE_ATTRIBUTES (TREE_TYPE (fun->decl)))
+  || lookup_attribute ("no_vartrace", DECL_ATTRIBUTES (fun->decl)))
+ return false;
+
+      // need to run pass always to check for variable attributes
+      return true;
+    }
+
+  virtual unsigned int execute (function *f) { return vartrace_execute (f); }
+};
+
+gimple_opt_pass *
+make_pass_vartrace (gcc::context *ctxt)
+{
+  return new pass_vartrace (ctxt);
+}
--
2.19.1

Reply | Threaded
Open this post in threaded view
|

[PATCH 3/3] Add tests for the vartrace pass

Andi Kleen-3
From: Andi Kleen <[hidden email]>

gcc/testsuite/:

2018-11-03  Andi Kleen  <[hidden email]>

        * g++.dg/vartrace-3.C: New test.
        * g++.dg/vartrace-ret.C: New test.
        * g++.dg/vartrace-ret2.C: New test.
        * gcc.target/i386/vartrace-1.c: New test.
        * gcc.target/i386/vartrace-10.c: New test.
        * gcc.target/i386/vartrace-11.c: New test.
        * gcc.target/i386/vartrace-12.c: New test.
        * gcc.target/i386/vartrace-13.c: New test.
        * gcc.target/i386/vartrace-14.c: New test.
        * gcc.target/i386/vartrace-15.c: New test.
        * gcc.target/i386/vartrace-16.c: New test.
        * gcc.target/i386/vartrace-2.c: New test.
        * gcc.target/i386/vartrace-3.c: New test.
        * gcc.target/i386/vartrace-4.c: New test.
        * gcc.target/i386/vartrace-5.c: New test.
        * gcc.target/i386/vartrace-6.c: New test.
        * gcc.target/i386/vartrace-7.c: New test.
        * gcc.target/i386/vartrace-8.c: New test.
        * gcc.target/i386/vartrace-9.c: New test.
---
 gcc/testsuite/g++.dg/vartrace-3.C           | 14 +++++++
 gcc/testsuite/g++.dg/vartrace-ret.C         | 17 +++++++++
 gcc/testsuite/g++.dg/vartrace-ret2.C        | 24 ++++++++++++
 gcc/testsuite/gcc.target/i386/vartrace-1.c  | 41 +++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/vartrace-10.c | 13 +++++++
 gcc/testsuite/gcc.target/i386/vartrace-11.c | 16 ++++++++
 gcc/testsuite/gcc.target/i386/vartrace-12.c | 16 ++++++++
 gcc/testsuite/gcc.target/i386/vartrace-13.c | 18 +++++++++
 gcc/testsuite/gcc.target/i386/vartrace-14.c | 17 +++++++++
 gcc/testsuite/gcc.target/i386/vartrace-15.c | 12 ++++++
 gcc/testsuite/gcc.target/i386/vartrace-16.c | 12 ++++++
 gcc/testsuite/gcc.target/i386/vartrace-17.c | 23 ++++++++++++
 gcc/testsuite/gcc.target/i386/vartrace-2.c  |  9 +++++
 gcc/testsuite/gcc.target/i386/vartrace-3.c  |  9 +++++
 gcc/testsuite/gcc.target/i386/vartrace-4.c  | 13 +++++++
 gcc/testsuite/gcc.target/i386/vartrace-5.c  | 11 ++++++
 gcc/testsuite/gcc.target/i386/vartrace-6.c  | 13 +++++++
 gcc/testsuite/gcc.target/i386/vartrace-7.c  | 11 ++++++
 gcc/testsuite/gcc.target/i386/vartrace-8.c  | 11 ++++++
 gcc/testsuite/gcc.target/i386/vartrace-9.c  | 10 +++++
 20 files changed, 310 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/vartrace-3.C
 create mode 100644 gcc/testsuite/g++.dg/vartrace-ret.C
 create mode 100644 gcc/testsuite/g++.dg/vartrace-ret2.C
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-9.c

diff --git a/gcc/testsuite/g++.dg/vartrace-3.C b/gcc/testsuite/g++.dg/vartrace-3.C
new file mode 100644
index 00000000000..13f71cca6d8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vartrace-3.C
@@ -0,0 +1,14 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mptwrite -fvartrace-args " } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int a;
+int b(int c)
+{
+  if (a)
+    c += 1;
+  else
+    c += b(a);
+  b(c);
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/vartrace-ret.C b/gcc/testsuite/g++.dg/vartrace-ret.C
new file mode 100644
index 00000000000..2a8a6753bd3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vartrace-ret.C
@@ -0,0 +1,17 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mptwrite -fvartrace-returns " } */
+/* { dg-final { scan-assembler-not "ptwrite" } } */
+
+class foo {
+public:
+    short a;
+    short b;
+};
+
+foo f1()
+{
+    foo x = { 1, 2 };
+    return x;
+}
+
+
diff --git a/gcc/testsuite/g++.dg/vartrace-ret2.C b/gcc/testsuite/g++.dg/vartrace-ret2.C
new file mode 100644
index 00000000000..56842d75fb6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vartrace-ret2.C
@@ -0,0 +1,24 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mptwrite -fvartrace " } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+typedef int a;
+enum b
+{ };
+struct ac
+{
+  a operator () (a, a, a, a, a, a);
+};
+struct c
+{
+  ac ag;
+} extern ai[];
+a d;
+void
+l (a e)
+{
+  b f;
+  a g, h, i, j, k;
+  e = d;
+  ai[f].ag (e, g, h, i, j, k);
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-1.c b/gcc/testsuite/gcc.target/i386/vartrace-1.c
new file mode 100644
index 00000000000..ff7a22398b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace -fvartrace-locals" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int foo;
+
+extern void f2 (void);
+
+void
+f0 (void)
+{
+  foo += 1;
+}
+
+int
+f3 (int a)
+{
+  return a * 2;
+}
+
+extern void extfunc (int);
+
+int
+f4 (int a, int b)
+{
+  extfunc (a);
+  extfunc (b);
+  return a + b;
+}
+
+void
+f5 (int a)
+{
+}
+
+int
+f (int a, int b)
+{
+  f2 ();
+  return a + b + foo;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-10.c b/gcc/testsuite/gcc.target/i386/vartrace-10.c
new file mode 100644
index 00000000000..37f2ede23ee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-10.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace" } */
+/* { dg-final { scan-assembler-not "ptwrite" } } */
+
+int a __attribute__ ((no_vartrace));
+
+extern void f2 (int);
+
+void
+f (void)
+{
+  f2 (a);
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-11.c b/gcc/testsuite/gcc.target/i386/vartrace-11.c
new file mode 100644
index 00000000000..3ad792fee34
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-11.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+struct foo
+{
+  __attribute__ ((vartrace)) int field;
+};
+
+struct foo a;
+
+int
+f (void)
+{
+  return a.field;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-12.c b/gcc/testsuite/gcc.target/i386/vartrace-12.c
new file mode 100644
index 00000000000..7f721e3beb7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-12.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+struct foo
+{
+  int field;
+} __attribute__ ((vartrace));
+
+struct foo a;
+
+int
+f (void)
+{
+  return a.field;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-13.c b/gcc/testsuite/gcc.target/i386/vartrace-13.c
new file mode 100644
index 00000000000..94802596d72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-13.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace" } */
+/* { dg-final { scan-assembler-not "ptwrite" } } */
+
+struct foo
+{
+  int field;
+} __attribute__ ((no_vartrace));
+
+struct foo a;
+
+extern void f2 (int);
+
+int
+f (void)
+{
+  f2 (a.field);
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-14.c b/gcc/testsuite/gcc.target/i386/vartrace-14.c
new file mode 100644
index 00000000000..d4db8bf735b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-14.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace" } */
+/* { dg-final { scan-assembler-not "ptwrite" } } */
+
+struct foo
+{
+  int __attribute__((no_vartrace)) field;
+};
+
+struct foo a;
+
+extern void f2(int);
+
+int f(void)
+{
+  f2 (a.field);
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-15.c b/gcc/testsuite/gcc.target/i386/vartrace-15.c
new file mode 100644
index 00000000000..02067a016e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-15.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-mptwrite -fvartrace" } */
+/* { dg-final { scan-assembler-not "ptwrite" } } */
+
+struct {
+  int __attribute__((vartrace)) x;
+} v;
+
+__attribute__((target("no-ptwrite"))) int f(void)
+{
+  return v.x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-16.c b/gcc/testsuite/gcc.target/i386/vartrace-16.c
new file mode 100644
index 00000000000..6d3014af688
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-16.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+struct {
+  int __attribute__((vartrace)) x;
+} v;
+
+__attribute__((target("ptwrite"))) int f(void)
+{
+  return v.x;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-17.c b/gcc/testsuite/gcc.target/i386/vartrace-17.c
new file mode 100644
index 00000000000..131db24f19c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-17.c
@@ -0,0 +1,23 @@
+/* Test optimization for redundant PTWRITEs */
+/* So far XFAIL because we generate redundant PTWRITEs */
+/* { dg-do compile } */
+/* { dg-options "-fvartrace -mptwrite" } */
+/* { dg-final { scan-assembler-times "ptwrite" 8 { xfail *-*-* } } } */
+
+int read_locals(int a, int b)
+{
+  return a+b;
+}
+
+int x;
+
+int global(int a)
+{
+  x += a;
+  return x + a;
+}
+
+int pointer_ref(int *f)
+{
+  return *f++;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-2.c b/gcc/testsuite/gcc.target/i386/vartrace-2.c
new file mode 100644
index 00000000000..1386d58a450
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace-args" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int
+f (int a)
+{
+  return a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-3.c b/gcc/testsuite/gcc.target/i386/vartrace-3.c
new file mode 100644
index 00000000000..0029660f284
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-3.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace-returns" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int
+f (int a)
+{
+  return a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-4.c b/gcc/testsuite/gcc.target/i386/vartrace-4.c
new file mode 100644
index 00000000000..aa09d14d49e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-4.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace-reads" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int a;
+
+extern void f2 (int);
+
+int
+f (void)
+{
+  f2 (a);
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-5.c b/gcc/testsuite/gcc.target/i386/vartrace-5.c
new file mode 100644
index 00000000000..7d7e90d3ead
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-5.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace-writes" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int a;
+
+void
+f (void)
+{
+  a++;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-6.c b/gcc/testsuite/gcc.target/i386/vartrace-6.c
new file mode 100644
index 00000000000..86b8a06ab64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-6.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace-reads -fvartrace-locals" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+extern void f2 (void);
+
+void
+f (void)
+{
+  int i;
+  for (i = 0; i < 10; i++)
+    f2 ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-7.c b/gcc/testsuite/gcc.target/i386/vartrace-7.c
new file mode 100644
index 00000000000..99269d70a75
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-7.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int a __attribute__ ((vartrace));
+
+int
+f (void)
+{
+  return a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-8.c b/gcc/testsuite/gcc.target/i386/vartrace-8.c
new file mode 100644
index 00000000000..ceef61944ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-8.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite" } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int a;
+
+__attribute__ ((vartrace))
+     int f (void)
+{
+  return a;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vartrace-9.c b/gcc/testsuite/gcc.target/i386/vartrace-9.c
new file mode 100644
index 00000000000..9216b0776b9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vartrace-9.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite -fvartrace" } */
+/* { dg-final { scan-assembler-not "ptwrite" } } */
+
+int a;
+
+__attribute__ ((no_vartrace)) int f (void)
+{
+  return a;
+}
--
2.19.1

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/3] Add PTWRITE builtins for x86

Andi Kleen-5
In reply to this post by Andi Kleen-3
Andi Kleen <[hidden email]> writes:

Ping!

> From: Andi Kleen <[hidden email]>
>
> Add builtins/intrinsics for PTWRITE. PTWRITE is a new instruction on Intel Gemini Lake/
> Goldmont Plus that allows to write values into the Processor Trace log. This allows
> very light weight instrumentation of programs.
>
> The intrinsics are compatible to icc. Automatically enabled for Goldmont Plus.
>
> gcc/:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
> * common/config/i386/i386-common.c (OPTION_MASK_ISA_PTWRITE_SET): New.
> (OPTION_MASK_ISA_PTWRITE_UNSET): New.
> (ix86_handle_option): Handle OPT_mptwrite.
> * config/i386/cpuid.h (bit_PTWRITE): Add.
> * config/i386/driver-i386.c (host_detect_local_cpu): Detect ptwrite.
> * config/i386/i386-builtin.def (BDESC): Add ptwrite32/64.
> * config/i386/i386-c.c (ix86_target_macros_internal): Define __PTWRITE__.
> * config/i386/i386.c (ix86_target_string): Handle ptwrite.
> (ix86_option_override_internal): Handle PTA_PTWRITE.
> (ix86_valid_target_attribute_inner_p): Define ptwrite.
> (def_builtin2): Force UINT64 to be 64bit only.
> * config/i386/i386.h (TARGET_PTWRITE): Add.
> (TARGET_PTWRITE_P): Add.
> (PTA_PTWRITE): Add.
> * config/i386/i386.md: Define ptwrite.
> * config/i386/i386.opt: Add -mptwrite.
> * config/i386/immintrin.h (_ptwrite64): Add.
> (_ptwrite32): Add
> * doc/extend.texi: Document __builtin_ia32_ptwrite*.
> * doc/invoke.texi: Document -mptwrite.
>
> gcc/testsuite/ChangeLog:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
> * gcc.target/i386/ptwrite1.c: New test.
> * gcc.target/i386/ptwrite2.c: New test.
> ---
>  gcc/common/config/i386/i386-common.c     | 15 ++++++++++++
>  gcc/config/i386/cpuid.h                  |  4 ++++
>  gcc/config/i386/driver-i386.c            | 12 ++++++++++
>  gcc/config/i386/i386-builtin.def         |  4 ++++
>  gcc/config/i386/i386-c.c                 |  2 ++
>  gcc/config/i386/i386.c                   |  9 ++++++-
>  gcc/config/i386/i386.h                   |  5 +++-
>  gcc/config/i386/i386.md                  | 10 ++++++++
>  gcc/config/i386/i386.opt                 |  4 ++++
>  gcc/config/i386/immintrin.h              | 26 ++++++++++++++++++++
>  gcc/doc/extend.texi                      |  9 +++++++
>  gcc/doc/invoke.texi                      |  7 ++++--
>  gcc/testsuite/gcc.target/i386/ptwrite1.c | 30 ++++++++++++++++++++++++
>  gcc/testsuite/gcc.target/i386/ptwrite2.c | 14 +++++++++++
>  14 files changed, 147 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/ptwrite1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/ptwrite2.c
>
> diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
> index f12806ef3a9..f740995c1e4 100644
> --- a/gcc/common/config/i386/i386-common.c
> +++ b/gcc/common/config/i386/i386-common.c
> @@ -140,6 +140,7 @@ along with GCC; see the file COPYING3.  If not see
>  
>  #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
>  #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
> +#define OPTION_MASK_ISA_PTWRITE_SET OPTION_MASK_ISA_PTWRITE
>  #define OPTION_MASK_ISA_F16C_SET \
>    (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
>  #define OPTION_MASK_ISA_MWAITX_SET OPTION_MASK_ISA_MWAITX
> @@ -267,6 +268,7 @@ along with GCC; see the file COPYING3.  If not see
>  
>  #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
>  #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
> +#define OPTION_MASK_ISA_PTWRITE_UNSET OPTION_MASK_ISA_PTWRITE
>  #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
>  
>  #define OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET \
> @@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts,
>   }
>        return true;
>  
> +    case OPT_mptwrite:
> +      if (value)
> + {
> +  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE_SET;
> +  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_SET;
> + }
> +      else
> + {
> +  opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_PTWRITE_UNSET;
> +  opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_UNSET;
> + }
> +      return true;
> +
>      case OPT_mf16c:
>        if (value)
>   {
> diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
> index 7e9e2d153dc..2e6d4a55602 100644
> --- a/gcc/config/i386/cpuid.h
> +++ b/gcc/config/i386/cpuid.h
> @@ -126,6 +126,10 @@
>  #define bit_XSAVEC (1 << 1)
>  #define bit_XSAVES (1 << 3)
>  
> +/* PT sub leaf (%eax == 14, %ecx == 0) */
> +/* %ebx */
> +#define bit_PTWRITE (1 << 4)
> +
>  /* Signatures for different CPU implementations as returned in uses
>     of cpuid with level 0.  */
>  #define signature_AMD_ebx 0x68747541
> diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
> index 8c830bde1dd..423b1c3827f 100644
> --- a/gcc/config/i386/driver-i386.c
> +++ b/gcc/config/i386/driver-i386.c
> @@ -427,6 +427,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
>    unsigned int has_waitpkg = 0;
>    unsigned int has_cldemote = 0;
>  
> +  unsigned int has_ptwrite = 0;
> +
>    bool arch;
>  
>    unsigned int l2sizekb = 0;
> @@ -542,6 +544,13 @@ const char *host_detect_local_cpu (int argc, const char **argv)
>        has_xsaves = eax & bit_XSAVES;
>      }
>  
> +  if (max_level >= 0x14)
> +    {
> +      __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
> +
> +      has_ptwrite = ebx & bit_PTWRITE;
> +    }
> +
>    /* Check cpuid level of extended features.  */
>    __cpuid (0x80000000, ext_level, ebx, ecx, edx);
>  
> @@ -1124,6 +1133,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
>        const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
>        const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
>        const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
> +      const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
> +
>        options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
>   sse4a, cx16, sahf, movbe, aes, sha, pclmul,
>   popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
> @@ -1137,6 +1148,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
>   clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
>   avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
>   avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
> + ptwrite,
>   NULL);
>      }
>  
> diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
> index df0f7e975ac..ca3f357228f 100644
> --- a/gcc/config/i386/i386-builtin.def
> +++ b/gcc/config/i386/i386-builtin.def
> @@ -2879,6 +2879,10 @@ BDESC_FIRST (special_args2, SPECIAL_ARGS2,
>   OPTION_MASK_ISA_WBNOINVD, CODE_FOR_wbnoinvd, "__builtin_ia32_wbnoinvd", IX86_BUILTIN_WBNOINVD, UNKNOWN, (int) VOID_FTYPE_VOID)
>  BDESC (OPTION_MASK_ISA_MOVDIR64B, CODE_FOR_nothing, "__builtin_ia32_movdir64b", IX86_BUILTIN_MOVDIR64B, UNKNOWN, (int) VOID_FTYPE_PVOID_PCVOID)
>  
> +/* PTWRITE */
> +BDESC (OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritesi, "__builtin_ia32_ptwrite32", IX86_BUILTIN_PTWRITE32, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
> +BDESC (OPTION_MASK_ISA_PTWRITE, CODE_FOR_ptwritedi, "__builtin_ia32_ptwrite64", IX86_BUILTIN_PTWRITE64, UNKNOWN, (int) VOID_FTYPE_UINT64)
> +
>  BDESC_END (SPECIAL_ARGS2, MULTI_ARG)
>  
>  /* FMA4 and XOP.  */
> diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
> index 005e1a5b308..4661d00f85d 100644
> --- a/gcc/config/i386/i386-c.c
> +++ b/gcc/config/i386/i386-c.c
> @@ -532,6 +532,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
>      def_or_undef (parse_in, "__WAITPKG__");
>    if (isa_flag2 & OPTION_MASK_ISA_CLDEMOTE)
>      def_or_undef (parse_in, "__CLDEMOTE__");
> +  if (isa_flag2 & OPTION_MASK_ISA_PTWRITE)
> +    def_or_undef (parse_in, "__PTWRITE__");
>    if (TARGET_IAMCU)
>      {
>        def_or_undef (parse_in, "__iamcu");
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 963c7fcbb34..490bb6292a8 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -2793,7 +2793,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
>      { "-mmwaitx", OPTION_MASK_ISA_MWAITX },
>      { "-mmovdir64b", OPTION_MASK_ISA_MOVDIR64B },
>      { "-mwaitpkg", OPTION_MASK_ISA_WAITPKG },
> -    { "-mcldemote", OPTION_MASK_ISA_CLDEMOTE }
> +    { "-mcldemote", OPTION_MASK_ISA_CLDEMOTE },
> +    { "-mptwrite", OPTION_MASK_ISA_PTWRITE }
>    };
>    static struct ix86_target_opts isa_opts[] =
>    {
> @@ -3875,6 +3876,9 @@ ix86_option_override_internal (bool main_args_p,
>   if (((processor_alias_table[i].flags & PTA_WBNOINVD) != 0)
>      && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_WBNOINVD))
>    opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_WBNOINVD;
> + if (((processor_alias_table[i].flags & PTA_PTWRITE) != 0)
> +    && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA_PTWRITE))
> +  opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE;
>  
>   if ((processor_alias_table[i].flags
>     & (PTA_PREFETCH_SSE | PTA_SSE)) != 0)
> @@ -5077,6 +5081,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
>      IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
>      IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
>      IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
> +    IX86_ATTR_ISA ("ptwrite",   OPT_mptwrite),
>  
>      /* enum options */
>      IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
> @@ -30180,6 +30185,8 @@ def_builtin2 (HOST_WIDE_INT mask, const char *name,
>    tree decl = NULL_TREE;
>  
>    ix86_builtins_isa[(int) code].isa2 = mask;
> +  if (tcode == VOID_FTYPE_UINT64)
> +    ix86_builtins_isa[(int) code].isa = OPTION_MASK_ISA_64BIT;
>  
>    if (mask == 0
>        || (mask & ix86_isa_flags2) != 0
> diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
> index 01d49a7263b..39d3e59c8dd 100644
> --- a/gcc/config/i386/i386.h
> +++ b/gcc/config/i386/i386.h
> @@ -191,6 +191,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
>  #define TARGET_WAITPKG_P(x) TARGET_ISA_WAITPKG_P(x)
>  #define TARGET_CLDEMOTE TARGET_ISA_CLDEMOTE
>  #define TARGET_CLDEMOTE_P(x) TARGET_ISA_CLDEMOTE_P(x)
> +#define TARGET_PTWRITE TARGET_ISA_PTWRITE
> +#define TARGET_PTWRITE_P(x) TARGET_ISA_PTWRITE_P(x)
>  
>  #define TARGET_LP64 TARGET_ABI_64
>  #define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
> @@ -2354,6 +2356,7 @@ const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
>  const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
>  const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
>  const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9);
> +const wide_int_bitmask PTA_PTWRITE (0, HOST_WIDE_INT_1U << 10);
>  
>  const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
>    | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
> @@ -2389,7 +2392,7 @@ const wide_int_bitmask PTA_GOLDMONT = PTA_SILVERMONT | PTA_SHA | PTA_XSAVE
>    | PTA_RDSEED | PTA_XSAVEC | PTA_XSAVES | PTA_CLFLUSHOPT | PTA_XSAVEOPT
>    | PTA_FSGSBASE;
>  const wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID
> -  | PTA_SGX;
> +  | PTA_SGX | PTA_PTWRITE;
>  const wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
>    | PTA_GFNI;
>  const wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 7fb2b144f47..bdc39456106 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -292,6 +292,8 @@
>  
>    ;; For Speculation Barrier support
>    UNSPECV_SPECULATION_BARRIER
> +
> +  UNSPECV_PTWRITE
>  ])
>  
>  ;; Constants to represent rounding modes in the ROUND instruction
> @@ -19498,6 +19500,14 @@
>    [(set_attr "type" "other")
>     (set_attr "prefix_extra" "2")])
>  
> +(define_insn "ptwrite<mode>"
> +  [(unspec_volatile [(match_operand:SWI48 0 "register_operand" "r")]
> +    UNSPECV_PTWRITE)]
> +  "TARGET_PTWRITE"
> +  "ptwrite\t%0"
> +  [(set_attr "type" "other")
> +   (set_attr "prefix_extra" "2")])
> +
>  (define_insn "rdrand<mode>_1"
>    [(set (match_operand:SWI248 0 "register_operand" "=r")
>   (unspec_volatile:SWI248 [(const_int 0)] UNSPECV_RDRAND))
> diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
> index e7fbf9b6f99..1705815a2ec 100644
> --- a/gcc/config/i386/i386.opt
> +++ b/gcc/config/i386/i386.opt
> @@ -785,6 +785,10 @@ mwbnoinvd
>  Target Report Mask(ISA_WBNOINVD) Var(ix86_isa_flags2) Save
>  Support WBNOINVD built-in functions and code generation.
>  
> +mptwrite
> +Target Report Mask(ISA_PTWRITE) Var(ix86_isa_flags2) Save
> +Support PTWRITE built-in functions and code generation.
> +
>  msgx
>  Target Report Mask(ISA_SGX) Var(ix86_isa_flags2) Save
>  Support SGX built-in functions and code generation.
> diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
> index 344e92c745a..b52ab91b4d5 100644
> --- a/gcc/config/i386/immintrin.h
> +++ b/gcc/config/i386/immintrin.h
> @@ -251,4 +251,30 @@ _rdrand64_step (unsigned long long *__P)
>  
>  #endif /* __x86_64__  */
>  
> +#ifndef __PTWRITE__
> +#pragma GCC push_options
> +#pragma GCC target("ptwrite")
> +#define __DISABLE_PTWRITE__
> +#endif
> +
> +#ifdef __x86_64__
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_ptwrite64 (unsigned long long __B)
> +{
> +  __builtin_ia32_ptwrite64 (__B);
> +}
> +#endif /* __x86_64__ */
> +
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +_ptwrite32 (unsigned __B)
> +{
> +  __builtin_ia32_ptwrite32 (__B);
> +}
> +#ifdef __DISABLE_PTWRITE__
> +#undef __DISABLE_PTWRITE__
> +#pragma GCC pop_options
> +#endif /* __DISABLE_PTWRITE__ */
> +
>  #endif /* _IMMINTRIN_H_INCLUDED */
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index e2b9ee11a54..1eca009e255 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -21331,6 +21331,15 @@ unsigned int __builtin_ia32_rdrand32_step (unsigned int *)
>  unsigned int __builtin_ia32_rdrand64_step (unsigned long long *)
>  @end smallexample
>  
> +The following built-in function is available when @option{-mptwrite} is
> +used.  All of them generate the machine instruction that is part of the
> +name.
> +
> +@smallexample
> +void __builtin_ia32_ptwrite32 (unsigned)
> +void __builtin_ia32_ptwrite64 (unsigned long long)
> +@end smallexample
> +
>  The following built-in functions are available when @option{-msse4a} is used.
>  All of them generate the machine instruction that is part of the name.
>  
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index e290128f535..cb5bc7bafc5 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -1281,7 +1281,7 @@ See RS/6000 and PowerPC Options.
>  -mmmx  -msse  -msse2  -msse3  -mssse3  -msse4.1  -msse4.2  -msse4  -mavx @gol
>  -mavx2  -mavx512f  -mavx512pf  -mavx512er  -mavx512cd  -mavx512vl @gol
>  -mavx512bw  -mavx512dq  -mavx512ifma  -mavx512vbmi  -msha  -maes @gol
> --mpclmul  -mfsgsbase  -mrdrnd  -mf16c  -mfma -mpconfig -mwbnoinvd @gol
> +-mpclmul  -mfsgsbase  -mrdrnd  -mf16c  -mfma -mpconfig -mwbnoinvd -mptwrite @gol
>  -mprefetchwt1  -mclflushopt  -mxsavec  -mxsaves @gol
>  -msse4a  -m3dnow  -m3dnowa  -mpopcnt  -mabm  -mbmi  -mtbm  -mfma4  -mxop @gol
>  -mlzcnt  -mbmi2  -mfxsr  -mxsave  -mxsaveopt  -mrtm  -mlwp @gol
> @@ -27815,6 +27815,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
>  @itemx -mfsgsbase
>  @opindex mfsgsbase
>  @need 200
> +@itemx -mptwrite
> +@opindex mptwrite
> +@need 200
>  @itemx -mrdrnd
>  @opindex mrdrnd
>  @need 200
> @@ -27923,7 +27926,7 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
>  @opindex mcldemote
>  These switches enable the use of instructions in the MMX, SSE,
>  SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD,
> -SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
> +SHA, AES, PCLMUL, FSGSBASE, PTWRITE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
>  AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, BMI, BMI2, VAES, WAITPKG,
>  FXSR, XSAVE, XSAVEOPT, LZCNT, RTM, MWAITX, PKU, IBT, SHSTK, AVX512VBMI2,
>  GFNI, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B,
> diff --git a/gcc/testsuite/gcc.target/i386/ptwrite1.c b/gcc/testsuite/gcc.target/i386/ptwrite1.c
> new file mode 100644
> index 00000000000..e09028ed428
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/ptwrite1.c
> @@ -0,0 +1,30 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mptwrite" } */
> +/* { dg-final { scan-assembler "ptwrite" } } */
> +
> +void ptwrite1(int a)
> +{
> +  __builtin_ia32_ptwrite32 (a);
> +}
> +
> +#ifdef __x86_64__
> +void ptwrite2(unsigned long b)
> +{
> +  __builtin_ia32_ptwrite64 (b);
> +}
> +
> +void ptwrite3(unsigned char b)
> +{
> +  __builtin_ia32_ptwrite64 (b);
> +}
> +
> +void ptwrite4(unsigned short b)
> +{
> +  __builtin_ia32_ptwrite64 (b);
> +}
> +#endif
> +
> +void ptwrite5(unsigned short b)
> +{
> +  __builtin_ia32_ptwrite32 (b);
> +}
> diff --git a/gcc/testsuite/gcc.target/i386/ptwrite2.c b/gcc/testsuite/gcc.target/i386/ptwrite2.c
> new file mode 100644
> index 00000000000..299c6511ef4
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/ptwrite2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mptwrite " } */
> +/* { dg-final { scan-assembler "ptwrite.*r" } } */
> +/* { dg-final { scan-assembler "ptwrite.*e" } } */
> +
> +#include <x86intrin.h>
> +
> +void ptwrite1(void)
> +{
> +  _ptwrite32 (1);
> +#ifdef __x86_64__
> +  _ptwrite64 (2);
> +#endif
> +}
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/3] Add PTWRITE builtins for x86

Uros Bizjak-3
In reply to this post by Andi Kleen-3
Hello!

> From: Andi Kleen <[hidden email]>
>
> Add builtins/intrinsics for PTWRITE. PTWRITE is a new instruction on Intel Gemini Lake/
> Goldmont Plus that allows to write values into the Processor Trace log. This allows
> very light weight instrumentation of programs.
>
> The intrinsics are compatible to icc. Automatically enabled for Goldmont Plus.
>
> gcc/:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
> * common/config/i386/i386-common.c (OPTION_MASK_ISA_PTWRITE_SET): New.
> (OPTION_MASK_ISA_PTWRITE_UNSET): New.
> (ix86_handle_option): Handle OPT_mptwrite.
> * config/i386/cpuid.h (bit_PTWRITE): Add.
> * config/i386/driver-i386.c (host_detect_local_cpu): Detect ptwrite.
> * config/i386/i386-builtin.def (BDESC): Add ptwrite32/64.
> * config/i386/i386-c.c (ix86_target_macros_internal): Define __PTWRITE__.
> * config/i386/i386.c (ix86_target_string): Handle ptwrite.
> (ix86_option_override_internal): Handle PTA_PTWRITE.
> (ix86_valid_target_attribute_inner_p): Define ptwrite.
> (def_builtin2): Force UINT64 to be 64bit only.
> * config/i386/i386.h (TARGET_PTWRITE): Add.
> (TARGET_PTWRITE_P): Add.
> (PTA_PTWRITE): Add.
> * config/i386/i386.md: Define ptwrite.
> * config/i386/i386.opt: Add -mptwrite.
> * config/i386/immintrin.h (_ptwrite64): Add.
> (_ptwrite32): Add
> * doc/extend.texi: Document __builtin_ia32_ptwrite*.
> * doc/invoke.texi: Document -mptwrite.
>
> gcc/testsuite/ChangeLog:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
> * gcc.target/i386/ptwrite1.c: New test.
> * gcc.target/i386/ptwrite2.c: New test.

OK for x86 part (that is only PATCH 1/3). It looks that this part can
go to mainline as an independent patch from other patches in serie.

diff --git a/gcc/testsuite/gcc.target/i386/ptwrite2.c
b/gcc/testsuite/gcc.target/i386/ptwrite2.c
new file mode 100644
index 00000000000..299c6511ef4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/ptwrite2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mptwrite " } */
+/* { dg-final { scan-assembler "ptwrite.*r" } } */
+/* { dg-final { scan-assembler "ptwrite.*e" } } */

Better use \[^\n\r\] instead of .* to avoid unwanted multi-line matches.

Thanks,
Uros.
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/3] Add PTWRITE builtins for x86

Andi Kleen-3
> OK for x86 part (that is only PATCH 1/3). It looks that this part can
> go to mainline as an independent patch from other patches in serie.

Thanks.

Note even 2/3 has a small i386 specific part. Would be good if you
could take a look at that part.

-Andi
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/3] Add PTWRITE builtins for x86

Uros Bizjak-3
On Thu, Nov 8, 2018 at 7:03 PM Andi Kleen <[hidden email]> wrote:
>
> > OK for x86 part (that is only PATCH 1/3). It looks that this part can
> > go to mainline as an independent patch from other patches in serie.
>
> Thanks.
>
> Note even 2/3 has a small i386 specific part. Would be good if you
> could take a look at that part.

It is "only" a hook (LGTM, BTW), but as part of functional part of the
patch, you will need an approval for the design and implementation of
the middle-end functionality first.

Uros.

> -Andi
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Richard Biener-2
In reply to this post by Andi Kleen-3
On Sun, Nov 4, 2018 at 7:33 AM Andi Kleen <[hidden email]> wrote:

>
> From: Andi Kleen <[hidden email]>
>
> Add a new pass to automatically instrument changes to variables
> with the new PTWRITE instruction on x86. PTWRITE writes a 4 or 8 byte
> field into an Processor Trace log, which allows log over head
> logging of informatin.
>
> This allows to reconstruct how values later, which can be useful for
> debugging or other analysis of the program behavior. With the compiler
> support this can be done with without having to manually add instrumentation
> to the code.
>
> Using dwarf information this can be later mapped back to the variables.
>
> There are new options to enable instrumentation for different types,
> and also a new attribute to control analysis fine grained per
> function or variable level. The attributes can be set on both
> the variable and the type level, and also on structure fields.
> This allows to enable tracing only for specific code in large
> programs.
>
> The pass is generic, but only the x86 backend enables the necessary
> hooks. When the backend enables the necessary hooks (with -mptwrite)
> there is an additional pass that looks through the code for
> attribute vartrace enabled functions or variables.
>
> The -fvartrace-locals options is experimental: it works, but it
> generates redundant ptwrites because the pass doesn't use
> the SSA information to minimize instrumentation. This could be optimized
> later.
>
> Currently the code can be tested with SDE, or on a Intel
> Gemini Lake system with a new enough Linux kernel (v4.10+)
> that supports PTWRITE for PT. Linux perf can be used to
> record the values
>
> perf record -e intel_pt/ptw=1,branch=0/ program
> perf script --itrace=crw -F +synth ...
>
> I have an experimential version of perf that can also use
> dwarf information to symbolize many[1] values back to their variable
> names. So far it is not in standard perf, but available at
>
> https://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git/log/?h=perf/var-resolve-4
>
> It is currently not able to decode all variable locations to names,
> but a large subset.
>
> Longer term hopefully gdb will support this information too.
>
> The CPU can potentially generate very data high bandwidths when
> code doing a lot of computation is heavily instrumented.
> This can cause some data loss in both the CPU and also in perf
> logging the data when the disk cannot keep up.
>
> Running some larger workloads most workloads do not cause
> CPU level overflows, but I've seen it with -fvartrace
> with crafty, and with more workloads with -fvartrace-locals.
>
> Recommendation is to not fully instrument programs,
> but only areas of interest either at the file level or using
> the attributes.
>
> The other thing is that perf and the disk often cannot keep up
> with the data bandwidth for longer computations. In this case
> it's possible to use perf snapshot mode (add --snapshot
> to the command line above). The data will be only logged to
> a memory ring buffer then, and only dump the buffers on events
> of interest by sending SIGUSR2 to the perf binrary.
>
> In the future this will be hopefully better supported with
> core files and gdb.
>
> Passes bootstrap and test suite on x86_64-linux, also
> bootstrapped and tested gcc itself with full -fvartrace
> and -fvartrace-locals instrumentation.

So how is this supposed to be used?  I guess in a
edit-debug cycle and not for production code?

What do you actually write with PTWRITE?  I suppose
you need to keep a ID to something mapping somewhere
so you can make sense of the perf records?

Few comments inline below, but I'm not sure if this
whole thing is interesting for GCC (as opposed to being
a instrumentation plugin)

> gcc/:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
>         * Makefile.in: Add tree-vartrace.o.
>         * common.opt: Add -fvartrace, -fvartrace-returns,
>         -fvartrace-args, -fvartrace-reads, -fvartrace-writes,
>         -fvartrace-locals
>         * config/i386/i386.c (ix86_vartrace_func): Add.
>         (TARGET_VARTRACE_FUNC): Add.
>         * doc/extend.texi: Document vartrace/no_vartrace
>         attributes.
>         * doc/invoke.texi: Document -fvartrace, -fvartrace-returns,
>         -fvartrace-args, -fvartrace-reads, -fvartrace-writes,
>         -fvartrace-locals
>         * doc/tm.texi (TARGET_VARTRACE_FUNC): Add.
>         * passes.def: Add vartrace pass.
>         * target.def (vartrace_func): Add.
>         * tree-pass.h (make_pass_vartrace): Add.
>         * tree-vartrace.c: New file to implement vartrace pass.
>
> gcc/c-family/:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
>         * c-attribs.c (handle_vartrace_attribute): New function.
>
> config/:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
>         * bootstrap-vartrace.mk: New.
>         * bootstrap-vartrace-locals.mk: New.
> ---
>  config/bootstrap-vartrace-locals.mk |   3 +
>  config/bootstrap-vartrace.mk        |   3 +
>  gcc/Makefile.in                     |   1 +
>  gcc/c-family/c-attribs.c            |  23 ++
>  gcc/common.opt                      |  24 ++
>  gcc/config/i386/i386.c              |  16 +
>  gcc/doc/extend.texi                 |  13 +
>  gcc/doc/invoke.texi                 |  29 ++
>  gcc/doc/tm.texi                     |   4 +
>  gcc/doc/tm.texi.in                  |   2 +
>  gcc/passes.def                      |   1 +
>  gcc/target.def                      |   7 +
>  gcc/tree-pass.h                     |   1 +
>  gcc/tree-vartrace.c                 | 463 ++++++++++++++++++++++++++++
>  14 files changed, 590 insertions(+)
>  create mode 100644 config/bootstrap-vartrace-locals.mk
>  create mode 100644 config/bootstrap-vartrace.mk
>  create mode 100644 gcc/tree-vartrace.c
>
> diff --git a/config/bootstrap-vartrace-locals.mk b/config/bootstrap-vartrace-locals.mk
> new file mode 100644
> index 00000000000..c6c79e21120
> --- /dev/null
> +++ b/config/bootstrap-vartrace-locals.mk
> @@ -0,0 +1,3 @@
> +STAGE2_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
> +STAGE3_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
> +STAGE4_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
> diff --git a/config/bootstrap-vartrace.mk b/config/bootstrap-vartrace.mk
> new file mode 100644
> index 00000000000..e29824d799b
> --- /dev/null
> +++ b/config/bootstrap-vartrace.mk
> @@ -0,0 +1,3 @@
> +STAGE2_CFLAGS += -mptwrite -fvartrace
> +STAGE3_CFLAGS += -mptwrite -fvartrace
> +STAGE4_CFLAGS += -mptwrite -fvartrace
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 719a516c356..46aa4800e57 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1594,6 +1594,7 @@ OBJS = \
>         tree-vectorizer.o \
>         tree-vector-builder.o \
>         tree-vrp.o \
> +       tree-vartrace.o \
>         tree.o \
>         typed-splay-tree.o \
>         unique-ptr-tests.o \
> diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
> index 4416b5042f7..66bbd87921f 100644
> --- a/gcc/c-family/c-attribs.c
> +++ b/gcc/c-family/c-attribs.c
> @@ -104,6 +104,8 @@ static tree handle_tls_model_attribute (tree *, tree, tree, int,
>                                         bool *);
>  static tree handle_no_instrument_function_attribute (tree *, tree,
>                                                      tree, int, bool *);
> +static tree handle_vartrace_attribute (tree *, tree,
> +                                                    tree, int, bool *);
>  static tree handle_no_profile_instrument_function_attribute (tree *, tree,
>                                                              tree, int, bool *);
>  static tree handle_malloc_attribute (tree *, tree, tree, int, bool *);
> @@ -325,6 +327,12 @@ const struct attribute_spec c_common_attribute_table[] =
>    { "no_instrument_function", 0, 0, true,  false, false, false,
>                               handle_no_instrument_function_attribute,
>                               NULL },
> +  { "vartrace",              0, 0, false,  false, false, false,
> +                             handle_vartrace_attribute,
> +                             NULL },
> +  { "no_vartrace",           0, 0, false,  false, false, false,
> +                             handle_vartrace_attribute,
> +                             NULL },
>    { "no_profile_instrument_function",  0, 0, true, false, false, false,
>                               handle_no_profile_instrument_function_attribute,
>                               NULL },
> @@ -767,6 +775,21 @@ handle_no_sanitize_undefined_attribute (tree *node, tree name, tree, int,
>    return NULL_TREE;
>  }
>
> +/* Handle "vartrace"/"no_vartrace" attributes; arguments as in
> +   struct attribute_spec.handler.  */
> +
> +static tree
> +handle_vartrace_attribute (tree *node, tree, tree, int flags,
> +                          bool *)
> +{
> +  if (TYPE_P (*node) && !(flags & (int) ATTR_FLAG_TYPE_IN_PLACE))
> +    *node = build_variant_type_copy (*node);

I don't think you want the attribute on types.  As far as I understood your
descriptions it should only be on variables and functions.

> +  /* Can apply to types, functions, variables.  */
> +  /* We lookup it up later with lookup_attribute.  */
> +  return NULL_TREE;
> +}
> +
>  /* Handle an "asan odr indicator" attribute; arguments as in
>     struct attribute_spec.handler.  */
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 2971dc21b1f..930acf40588 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2811,6 +2811,30 @@ ftree-scev-cprop
>  Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
>  Enable copy propagation of scalar-evolution information.
>
> +fvartrace
> +Common Report Var(flag_vartrace)
> +Generate all variable tracking instrumentations, except for locals.
> +
> +fvartrace-returns
> +Common Report Var(flag_vartrace_returns)
> +Generate variable tracking instructions for function returns.
> +
> +fvartrace-args
> +Common Report Var(flag_vartrace_args)
> +Generate variable tracking instructions for function arguments.
> +
> +fvartrace-reads
> +Common Report Var(flag_vartrace_reads)
> +Generate variable tracking instructions for reads.
> +
> +fvartrace-writes
> +Common Report Var(flag_vartrace_writes)
> +Generate variable tracking instructions for writes.
> +
> +fvartrace-locals
> +Common Report Var(flag_vartrace_locals)
> +Generate variable tracking instructions for locals.
> +

Please use -fvartrace= and sth like -fsantitize=.

>  ; -fverbose-asm causes extra commentary information to be produced in
>  ; the generated assembly code (to make it more readable).  This option
>  ; is generally only of use to those who actually need to read the
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 490bb6292a8..4337121c239 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -31873,6 +31873,19 @@ ix86_mangle_function_version_assembler_name (tree decl, tree id)
>  }
>
>
> +static tree
> +ix86_vartrace_func (tree type)

All functions need comments describing them, their arguments
and return values.

> +{
> +  if (!(ix86_isa_flags2 & OPTION_MASK_ISA_PTWRITE))
> +    return NULL;
> +  if (TYPE_PRECISION (type) == 32)
> +    return ix86_builtins [(int) IX86_BUILTIN_PTWRITE32];
> +  else if (TYPE_PRECISION (type) == 64)
> +    return ix86_builtins [(int) IX86_BUILTIN_PTWRITE64];
> +  else
> +    return NULL;
> +}
> +
>  static tree
>  ix86_mangle_decl_assembler_name (tree decl, tree id)
>  {
> @@ -50849,6 +50862,9 @@ ix86_run_selftests (void)
>  #undef TARGET_ASAN_SHADOW_OFFSET
>  #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
>
> +#undef TARGET_VARTRACE_FUNC
> +#define TARGET_VARTRACE_FUNC ix86_vartrace_func
> +
>  #undef TARGET_GIMPLIFY_VA_ARG_EXPR
>  #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
>
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 1eca009e255..08286aa4591 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -3193,6 +3193,13 @@ the standard C library can be guaranteed not to throw an exception
>  with the notable exceptions of @code{qsort} and @code{bsearch} that
>  take function pointer arguments.
>
> +@item no_vartrace
> +@cindex @code{no_vartrace} function or variable attribute
> +Disable data tracing for the function or variable or structured field
> +marked with this attribute. Applies to types. Currently implemented
> +for x86 when the @option{ptwrite} target option is enabled for systems
> +that support the @code{PTWRITE} instruction.

How does it apply to types?

>  @item optimize (@var{level}, @dots{})
>  @item optimize (@var{string}, @dots{})
>  @cindex @code{optimize} function attribute
> @@ -3454,6 +3461,12 @@ When applied to a member function of a C++ class template, the
>  attribute also means that the function is instantiated if the
>  class itself is instantiated.
>
> +@item vartrace
> +@cindex @code{vartrace} function or variable attribute
> +Enable data tracing for the function or variable or structure field
> +marked with this attribute. Applies to types. Will not trace locals,
> +but arguments, returns, globals, pointer references.
> +
>  @item visibility ("@var{visibility_type}")
>  @cindex @code{visibility} function attribute
>  This attribute affects the linkage of the declaration to which it is attached.
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index cb5bc7bafc5..2f10b3c1023 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -2720,6 +2720,35 @@ Don't use the @code{__cxa_get_exception_ptr} runtime routine.  This
>  causes @code{std::uncaught_exception} to be incorrect, but is necessary
>  if the runtime routine is not available.
>
> +@item -fvartrace
> +@opindex -fvartrace
> +Insert trace instructions to trace variable values at runtime.

Please elaborate on what "tracing" means.  And how a user is supposed to
get at a trace result.  Remember this is user documentation.

> +Requires enabling a backend specific option, like @option{-mptwrite} to enable
> +@code{PTWRITE} instruction generation on x86. @option{-fvartrace} traces
> +arguments, return values, pointer references and globals, but no locals.
> +
> +@item -fvartrace-args
> +@opindex -fvartrace-args
> +Trace arguments. Can be used independently or together with @option{-vartrace},
> +or as @option{-fno-vartrace-args} to disable.
> +
> +@item -fvartrace-returns
> +@opindex -fvartrace-returns
> +Trace return values.  Can be used independently or together with @option{-vartrace},
> +or as @option{-fno-vartrace-return} to disable.
> +
> +@item -fvartrace-reads
> +@opindex -fvartrace-reads
> +Trace reads.
> +
> +@item -fvartrace-writes
> +@opindex -fvartrace-writes
> +Trace writes.
> +
> +@item -fvartrace-locals
> +@opindex -fvartrace-locals
> +Insert code to trace local variables. This can have high overhead.
> +
>  @item -fvisibility-inlines-hidden
>  @opindex fvisibility-inlines-hidden
>  This switch declares that the user does not attempt to compare
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index f841527f971..6555cb122e9 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11933,6 +11933,10 @@ Address Sanitizer shadow memory address.  NULL if Address Sanitizer is not
>  supported by the target.
>  @end deftypefn
>
> +@deftypefn {Target Hook} tree TARGET_VARTRACE_FUNC (tree @var{type})
> +Return a builtin to call to trace variables or NULL if not supported by the target.

Please elaborate on the required signature of the builtin, its
arguments and semantics.
Is this really expected to be similar enough across architectures to make this a
middle-end feature rather than a target specific isntrumentation thing
in md-reorg or so?

> +@end deftypefn
> +
>  @deftypefn {Target Hook} {unsigned HOST_WIDE_INT} TARGET_MEMMODEL_CHECK (unsigned HOST_WIDE_INT @var{val})
>  Validate target specific memory model mask bits. When NULL no target specific
>  memory model bits are allowed.
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 967ef3ad22f..7cce21bb26c 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -8101,6 +8101,8 @@ and the associated definitions of those functions.
>
>  @hook TARGET_ASAN_SHADOW_OFFSET
>
> +@hook TARGET_VARTRACE_FUNC
> +
>  @hook TARGET_MEMMODEL_CHECK
>
>  @hook TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
> diff --git a/gcc/passes.def b/gcc/passes.def
> index 24f212c8e31..518cb4ef6f7 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -179,6 +179,7 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_oacc_device_lower);
>    NEXT_PASS (pass_omp_device_lower);
>    NEXT_PASS (pass_omp_target_link);
> +  NEXT_PASS (pass_vartrace);

Wow, that's early.  Reasoning for the placement before post-IPA optimizations?

>    NEXT_PASS (pass_all_optimizations);
>    PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
>        NEXT_PASS (pass_remove_cgraph_callee_edges);
> diff --git a/gcc/target.def b/gcc/target.def
> index ad27d352ca4..db5d88efb95 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -4300,6 +4300,13 @@ supported by the target.",
>   unsigned HOST_WIDE_INT, (void),
>   NULL)
>
> +/* Defines the builtin to trace variables, or NULL.  */
> +DEFHOOK
> +(vartrace_func,
> + "Return a builtin to call to trace variables or NULL if not supported by the target.",
> + tree, (tree type),
> + NULL)
> +
>  /* Functions relating to calls - argument passing, returns, etc.  */
>  /* Members of struct call have no special macro prefix.  */
>  HOOK_VECTOR (TARGET_CALLS, calls)
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index af15adc8e0c..2cf31785a6f 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -423,6 +423,7 @@ extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_post_ipa_warn (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_stdarg (gcc::context *ctxt);
> +extern gimple_opt_pass *make_pass_vartrace (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
> diff --git a/gcc/tree-vartrace.c b/gcc/tree-vartrace.c
> new file mode 100644
> index 00000000000..07f5aa6bc8f
> --- /dev/null
> +++ b/gcc/tree-vartrace.c
> @@ -0,0 +1,463 @@
> +/* Insert instructions for data value tracing.
> +   Copyright (C) 2017 Free Software Foundation, Inc.

It's 2018 now

> +   Contributed by Andi Kleen.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "target.h"
> +#include "tree.h"
> +#include "tree-iterator.h"
> +#include "tree-pass.h"
> +#include "basic-block.h"
> +#include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "gimplify.h"
> +#include "gimplify-me.h"
> +#include "gimple-ssa.h"
> +#include "gimple-pretty-print.h"
> +#include "cfghooks.h"
> +#include "ssa.h"
> +#include "tree-dfa.h"
> +#include "attribs.h"
> +
> +enum attrstate { force_off, force_on, neutral };

Please put most of the private stuff into an anonymous namespace.

> +/* Can we trace with attributes ATTR.  */
> +
> +static attrstate supported_attr (tree attr)
> +{
> +  if (lookup_attribute ("no_vartrace", attr))
> +    return force_off;
> +  if (lookup_attribute ("vartrace", attr))
> +    return force_on;
> +  return neutral;
> +}
> +
> +/* Is ARG supported considering S, handling both decls and other trees.  */
> +
> +static attrstate supported_op (tree arg, attrstate s)
> +{
> +  if (s != neutral)
> +    return s;
> +  if (DECL_P (arg))
> +    {
> +      s = supported_attr (DECL_ATTRIBUTES (arg));
> +      if (s != neutral)
> +       return s;
> +    }
> +  return supported_attr (TYPE_ATTRIBUTES (TREE_TYPE (arg)));
> +}
> +
> +/* Can we trace T.  */
> +
> +static attrstate supported_type (tree t)
> +{
> +  tree type = TREE_TYPE (t);
> +
> +  if (!POINTER_TYPE_P (type) && !INTEGRAL_TYPE_P (type))
> +    return force_off;
> +  enum attrstate s = supported_op (t, neutral);
> +  if (TREE_CODE (t) == COMPONENT_REF
> +          || TREE_CODE (t) == ARRAY_REF)

indenting seems off.  I think you want if (handled_component_p (t))
instead for the recursion to TREE_OPERAND (t, 0).  You also
want to finally recurse into a [TARGET_]MEM_REF operand zero.

You then also want to handle if t is &a and recurse to a.  You may
see

   MEM[&a + 4].c.d.e

here.  Eventually you simply want to use for_each_index I guess?

> +    {
> +      s = supported_op (TREE_OPERAND (t, 0), s);
> +      s = supported_op (TREE_OPERAND (t, 1), s);
> +    }
> +  return s;
> +}
> +
> +/* Can we trace T, or if FORCE is set.  */
> +
> +static bool supported_type_or_force (tree t, bool force)
> +{
> +  enum attrstate s = supported_type (t);
> +  if (s == neutral)
> +    return force;
> +  return s == force_off ? false : true;
> +}
> +
> +/* Return true if T refering to a local variable.
> +   ?? better ways to do this?  */

auto_var_in_fn_p (t, cfun)

if you mean a variable with automatic storage duration
(stack or register).

> +static bool is_local (tree t)
> +{
> +  // Add another attribute to override?
> +  if (!flag_vartrace_locals)
> +    return false;
> +  if (TREE_STATIC (t))
> +    return false;
> +  if (TREE_CODE (t) == VAR_DECL && DECL_EXTERNAL (t))
> +    return false;
> +  return true;
> +}
> +
> +/* Is T something we can log, FORCEing the type if needed.  */
> +
> +static bool supported_mem (tree t, bool force)
> +{
> +  enum attrstate s = supported_type (t);
> +
> +  if (s == force_off)
> +    return false;
> +
> +  switch (TREE_CODE (t))
> +    {
> +    case VAR_DECL:
> +      if (DECL_ARTIFICIAL (t))
> +       return false;
> +      if (is_local (t))
> +       return true;
> +      return s == force_on || force;
> +
> +    case ARRAY_REF:
> +    case COMPONENT_REF:
> +      t = TREE_OPERAND (t, 0);
> +      if (is_local (t))
> +       return true;

What about a.b.c?

> +      return s == force_on || force;
> +
> +    case TARGET_MEM_REF:
> +    case MEM_REF:
> +      // could use points-to to check for locals?

There can be non-pointers wrapped here as &x.  I suggest
to combine supported_type and this into one beast since
you need to recurse.

> +      return true;
> +
> +    case SSA_NAME:
> +      if (flag_vartrace_locals && is_gimple_reg (t))
> +       return true;

is_gimple_reg is always true for SSA_NAMEs.  If you
want to only track things that appear in debug info
you probably want to do sth like

   if (flag_vartrace_locals
       && SSA_NAME_VAR (t)
       && !DECL_IGNORED (SSA_NAME_VAR (t)))
     return true;

> +      break;
> +
> +    default:
> +      break;
> +    }
> +
> +  return false;
> +}
> +
> +/* Print debugging for inserting CALL at ORIG_STMT with type of VAL.  */
> +
> +static void log_trace_code (gimple *orig_stmt, gimple *code,
> +                           tree val)

Just noticing here but coding convention says it should be

static void
log_trace_code (...

> +{
> +  if (dump_file)
> +    {
> +      if (orig_stmt)
> +       fprintf (dump_file, "BB%d ", gimple_bb (orig_stmt)->index);
> +      fprintf (dump_file, "inserting ");
> +      print_gimple_stmt (dump_file, code, 0, TDF_VOPS|TDF_MEMSYMS);
> +      if (orig_stmt)
> +       {
> +         fprintf (dump_file, "orig ");
> +         print_gimple_stmt (dump_file, orig_stmt, 2,
> +                            TDF_VOPS|TDF_MEMSYMS);
> +       }
> +      fprintf (dump_file, "type ");
> +      print_generic_expr (dump_file, TREE_TYPE (val), TDF_SLIM);
> +      fputc ('\n', dump_file);
> +      fputc ('\n', dump_file);
> +    }
> +}
> +
> +/* Insert variable tracing code for VAL before iterator GI, originally
> +   for ORIG_STMT.  Return trace variable or NULL.  */
> +
> +static tree insert_trace (gimple_stmt_iterator *gi, tree val,
> +                         gimple *orig_stmt)
> +{
> +  tree func = targetm.vartrace_func (TREE_TYPE (val));
> +  if (!func)
> +    return NULL_TREE;
> +
> +  location_t loc = gimple_location (orig_stmt);
> +
> +  gimple_seq seq = NULL;
> +  tree tvar = make_ssa_name (TREE_TYPE (val));
> +  gassign *assign = gimple_build_assign (tvar, val);
> +  log_trace_code (orig_stmt, assign, val);
> +  gimple_set_location (assign, loc);
> +  gimple_seq_add_stmt (&seq, assign);

You can elide this copy if val is a register.

> +  gcall *call = gimple_build_call (func, 1, tvar);
> +  log_trace_code (NULL, call, tvar);
> +  gimple_set_location (call, loc);
> +  gimple_seq_add_stmt (&seq, call);

There doesn't seem to be any point in using a seq here,
just do gsi_insert_before (gi, assign/call, GSI_SAME_STMT)
twice

> +  gsi_insert_seq_before (gi, seq, GSI_SAME_STMT);
> +  return tvar;
> +}
> +
> +/* Insert trace at GI for T in FUN if suitable memory or variable reference.
> +   Always if FORCE. Originally on ORIG_STMT.  */
> +
> +tree instrument_mem (gimple_stmt_iterator *gi, tree t,
> +                    bool force,
> +                    gimple *orig_stmt)
> +{
> +  if (supported_mem (t, force))
> +    return insert_trace (gi, t, orig_stmt);
> +  return NULL_TREE;
> +}
> +
> +/* Instrument arguments for FUN considering FORCE. Return true if
> +   function has changed.  */
> +
> +bool instrument_args (function *fun, bool force)
> +{
> +  bool changed = false;
> +  gimple_stmt_iterator gi;
> +
> +  /* Local tracing usually takes care of the argument too, when
> +     they are read. This avoids redundant trace instructions.  */
> +  if (flag_vartrace_locals)
> +    return false;
> +
> +  for (tree arg = DECL_ARGUMENTS (current_function_decl);
> +       arg != NULL_TREE;
> +       arg = DECL_CHAIN (arg))
> +    {
> +     gi = gsi_start_bb (BASIC_BLOCK_FOR_FN (fun, NUM_FIXED_BLOCKS));
> +     if (supported_type_or_force (arg, force || flag_vartrace_args))
> +       {
> +         tree func = targetm.vartrace_func (TREE_TYPE (arg));
> +         if (!func)
> +           continue;
> +
> +         tree sarg = NULL;
> +         // ??? or force like sanopt?
> +         if (is_gimple_reg (arg))
> +           sarg = get_or_create_ssa_default_def (fun, arg);
> +         if (!sarg)
> +           continue;
> +
> +         if (has_zero_uses (sarg))
> +           continue;

I think you do not want to use get_or_create_ssa_default_def
but simply ssa_default_def () which will return NULL if there
isn't any.

> +
> +         gimple_seq seq = NULL;
> +         tree tvar = make_ssa_name (TREE_TYPE (sarg));
> +         gassign *assign = gimple_build_assign (tvar, sarg);
> +         gimple_set_location (assign, fun->function_start_locus);
> +         gimple_seq_add_stmt (&seq, assign);
> +
> +         gcall *call = gimple_build_call (func, 1, tvar);
> +         log_trace_code (NULL, call, tvar);
> +         gimple_set_location (call, fun->function_start_locus);
> +         gimple_seq_add_stmt (&seq, call);
> +
> +         edge edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (fun));
> +         gsi_insert_seq_on_edge_immediate (edge, seq);

Use gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun)) as
insertion point.  I think you can then use your regular instrumentation routine
if you pass it a location.

> +
> +         changed = true;
> +       }
> +    }
> +  return changed;
> +}
> +
> +/* Generate trace call for store STMT at GI, force if FORCE.  Return true
> +   if successfull. Modifies the original store to use a temporary.  */
> +
> +static bool instrument_store (gimple_stmt_iterator *gi, gimple *stmt, bool force)

Noticing here - if you know you are dealing with assigns use gassign *stmt

> +{
> +  if (!supported_mem (gimple_assign_lhs (stmt), force))
> +    return false;
> +
> +  tree orig_tgt = gimple_assign_lhs (stmt);
> +
> +  tree func = targetm.vartrace_func (TREE_TYPE (orig_tgt));
> +  if (!func)
> +    return false;
> +
> +  tree new_tgt = make_ssa_name(TREE_TYPE (orig_tgt));
> +  gimple_assign_set_lhs (stmt, new_tgt);
> +  update_stmt (stmt);
> +  log_trace_code (NULL, stmt, new_tgt);
> +
> +  gcall *tcall = gimple_build_call (func, 1, new_tgt);
> +  log_trace_code (stmt, tcall, new_tgt);
> +  gimple_set_location (tcall, gimple_location (stmt));
> +  gsi_insert_after (gi, tcall, GSI_CONTINUE_LINKING);
> +
> +  gassign *new_store = gimple_build_assign (orig_tgt, new_tgt);
> +  gimple_set_location (new_store, gimple_location (stmt));
> +  log_trace_code (NULL, new_store, new_tgt);
> +  gsi_insert_after (gi, new_store, GSI_CONTINUE_LINKING);

You wreck virtual SSA form here.  Why not keep the original store
in place and instead insert the call before it, using gimple_assign_rhs1 (stmt)
as the value -- since you seem to know the store is register typed the
rhs1 is either an SSA name or a constant (you probably do not want to
inttrument stores from constants)

> +  return true;
> +}
> +
> +/* Instrument STMT at GI. Force if FORCE. CHANGED is the previous changed
> +   state, which is also returned.  */
> +
> +bool instrument_assign (gimple_stmt_iterator *gi,
> +                       gimple *stmt, bool changed, bool force)
> +{
> +  gassign *gas = as_a <gassign *> (stmt);
> +  bool read_force = force || flag_vartrace_reads;
> +  tree t;
> +
> +  t = instrument_mem (gi, gimple_assign_rhs1 (gas),
> +                     read_force,
> +                     stmt);
> +  if (t)
> +    {
> +      gimple_assign_set_rhs1 (gas, t);

it's odd that you do this here, outside of the actual
instrumentation?

> +      changed = true;
> +    }
> +  if (gimple_num_ops (gas) > 2)
> +    {

memory operands can be only in rhs1 and lhs.  In fact
all memory operands in assignments are in assigns
that satisfy gimple_assign_single_p, so just check that
early and bail out otherwise.

> +      t = instrument_mem (gi, gimple_assign_rhs2 (gas),
> +                         read_force,
> +                         stmt);
> +      if (t)
> +       {
> +         gimple_assign_set_rhs2 (gas, t);
> +         changed = true;
> +       }
> +    }
> +  if (gimple_num_ops (gas) > 3)
> +    {
> +      t = instrument_mem (gi, gimple_assign_rhs3 (gas),
> +                         read_force,
> +                         stmt);
> +      if (t)
> +       {
> +         gimple_assign_set_rhs3 (gas, t);
> +         changed = true;
> +       }
> +      }
> +  if (gimple_num_ops (gas) > 4)
> +    gcc_unreachable ();
> +  if (gimple_store_p (stmt))
> +    changed |= instrument_store (gi, stmt, flag_vartrace_writes || force);

There is also gimple_load_p of course.

> +  if (changed)
> +    update_stmt (stmt);
> +  return changed;
> +}
> +
> +/* Instrument return in function FUN at statement STMT at GI, force if
> +   FORCE.  CHANGED is the changed flag, which is also returned.  */
> +
> +static bool instrument_return (function *fun,
> +                              gimple_stmt_iterator *gi,
> +                              gimple *stmt, bool changed,
> +                              bool force)
> +{
> +  tree restype = TREE_TYPE (TREE_TYPE (fun->decl));
> +  greturn *gret = as_a <greturn *> (stmt);
> +  tree rval = gimple_return_retval (gret);
> +
> +  /* Cannot handle complex C++ return values at this point, even
> +     if they would collapse to a valid trace type.  */
> +  if (rval
> +      && useless_type_conversion_p (restype, TREE_TYPE (rval))

You are probably confused by DECL_BY_REFERENCE on the return decl ;)

> +      && supported_type_or_force (rval, flag_vartrace_returns || force))
> +    {
> +      if (tree tvar = insert_trace (gi, rval, stmt))
> +       {
> +         changed = true;
> +         gimple_return_set_retval (gret, tvar);
> +         log_trace_code (NULL, gret, tvar);
> +         update_stmt (stmt);
> +       }
> +    }
> +
> +  return changed;
> +}
> +
> +/* Insert vartrace calls for FUN.  */
> +
> +static unsigned int vartrace_execute (function *fun)
> +{
> +  basic_block bb;
> +  gimple_stmt_iterator gi;
> +  bool force = flag_vartrace;
> +  bool changed;
> +
> +  if (lookup_attribute ("vartrace", TYPE_ATTRIBUTES (TREE_TYPE (fun->decl)))

checking on the type is odd I think.

> +      || lookup_attribute ("vartrace", DECL_ATTRIBUTES (fun->decl)))
> +    force = true;
> +
> +  changed = instrument_args (fun, force);
> +
> +  FOR_ALL_BB_FN (bb, fun)

FOR_EACH_BB_FN is enough

> +    for (gi = gsi_start_bb (bb); !gsi_end_p (gi); gsi_next (&gi))
> +      {
> +       gimple *stmt = gsi_stmt (gi);
> +       if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
> +         changed = instrument_assign (&gi, stmt, changed, force);

this overwrites changed, did you want to use |= ?

> +       else if (gimple_code (stmt) == GIMPLE_RETURN)
> +         {
> +           changed = instrument_return (fun, &gi, stmt, changed, force);

Likewise.

> +           // must end basic block
> +           break;
> +         }
> +
> +       // instrument something else like CALL?

Call LHS and arguments may contain stores/loads but those would be
all of non-register type.

> +       // We assume everything interesting is in a GIMPLE_ASSIGN
> +      }
> +  return changed ? TODO_update_ssa : 0;

You shouldn't need this if you were not wrecking virtual SSA form ;)

> +}
> +
> +const pass_data pass_data_vartrace =
> +{
> +  GIMPLE_PASS, /* type */
> +  "vartrace", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_NONE, /* tv_id */
> +  0, /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  0, /* todo_flags_finish */
> +};
> +
> +class pass_vartrace : public gimple_opt_pass
> +{
> +public:
> +  pass_vartrace (gcc::context *ctxt)
> +    : gimple_opt_pass (pass_data_vartrace, ctxt)
> +  {}
> +
> +  virtual opt_pass * clone ()
> +    {
> +      return new pass_vartrace (m_ctxt);
> +    }
> +
> +  virtual bool gate (function *fun)
> +    {
> +      // check if vartrace is supported in backend
> +      if (!targetm.vartrace_func ||

coding conventions say || goes to the next line.

> +         targetm.vartrace_func (integer_type_node) == NULL)
> +       return false;
> +
> +      if (lookup_attribute ("no_vartrace", TYPE_ATTRIBUTES (TREE_TYPE (fun->decl)))
> +         || lookup_attribute ("no_vartrace", DECL_ATTRIBUTES (fun->decl)))
> +       return false;
> +
> +      // need to run pass always to check for variable attributes
> +      return true;
> +    }
> +
> +  virtual unsigned int execute (function *f) { return vartrace_execute (f); }
> +};
> +
> +gimple_opt_pass *
> +make_pass_vartrace (gcc::context *ctxt)
> +{
> +  return new pass_vartrace (ctxt);
> +}
> --
> 2.19.1
>
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Andi Kleen-5
Hi Richard,

On Fri, Nov 09, 2018 at 04:27:22PM +0100, Richard Biener wrote:
> > Passes bootstrap and test suite on x86_64-linux, also
> > bootstrapped and tested gcc itself with full -fvartrace
> > and -fvartrace-locals instrumentation.
>
> So how is this supposed to be used?  I guess in a
> edit-debug cycle and not for production code?

It can actually be used for production code.

When processor trace is disabled the PTWRITE
instructions acts as nops. So it's only increasing
the code foot print. Since the instrumentation
should only log values which are already computed
it normally doesn't cause any other code.

Even when it is enabled the primary overhead is the
additional memory bandwidth, since the CPU can
do the logging in parallel to other code. As long
as the instrumentation is not too excessive to generate
too much memory bandwidth, it might be actually
quite reasonable to even keep the logging on for
production code, and use it as a "flight recorder",
which is dumped on failures.

This would also be the model in gdb, once we have support
in it. You would run the program in the debugger
and it just logs the data to a memory buffer,
but when stopping the value history can be examined.

There's also some ongoing work to add (optional) support
for PT to Linux crash dumps, so eventually that will
work without having to always run the debugger.

Today it can be done by running perf in the background
to record the PT, however there the setup is a bit
more complicated.

The primary use case I was envisioning was to set
the attribute on some critical functions/structures/types
of interest and then have a very overhead logging
option for them (generally cheaper than
equivalent software instrumentation). And then
they automatically gets logged without the programmer
needing to add lots of instrumentation code to
catch every instance. So think of it as a
"hardware accelerated printf"

>
> What do you actually write with PTWRITE?  I suppose
> you need to keep a ID to something mapping somewhere
> so you can make sense of the perf records?

PTWRITE writes 32bit/64bit values. The CPU reports the
IP of PTWRITE in the log, either explicitely, or implicitely if branch
trace is enabled too. The IP can then be used to look up
the DWARF scope for that IP. Then the decoder
decodes the operand of PTWRITE and maps it back using
the dwarf information. So it all works using
existing debugger infrastructure, and a quite simple
instruction decoder.

I'll clarify that in the description.

>
> Few comments inline below, but I'm not sure if this
> whole thing is interesting for GCC (as opposed to being
> a instrumentation plugin)

I'm biased, but I think automatic data tracing is a very exciting
use case, so hopefully it can be considered for mainstream gcc.

> >                               handle_no_profile_instrument_function_attribute,
> >                               NULL },
> > @@ -767,6 +775,21 @@ handle_no_sanitize_undefined_attribute (tree *node, tree name, tree, int,
> >    return NULL_TREE;
> >  }
> >
> > +/* Handle "vartrace"/"no_vartrace" attributes; arguments as in
> > +   struct attribute_spec.handler.  */
> > +
> > +static tree
> > +handle_vartrace_attribute (tree *node, tree, tree, int flags,
> > +                          bool *)
> > +{
> > +  if (TYPE_P (*node) && !(flags & (int) ATTR_FLAG_TYPE_IN_PLACE))
> > +    *node = build_variant_type_copy (*node);
>
> I don't think you want the attribute on types.  As far as I understood your
> descriptions it should only be on variables and functions.

The idea was that it's possible to trace all instances of a type,
especially structure members. Otherwise it will be harder for
the programmer to hunt down every instance.

For example if I have a structure that is used over a program,
and one member gets the wrong value.

I can do then in the header:

struct foo {
        int member __attribute__(("vartrace"));
};

and then recompile the program. Every instance of writing to
member will then be automatically instrumented (assuming
the program stays type safe)

Makes sense?

[BTW I considered adding an address trace
too for pointer writes to hunt down the non type safe
instances and possibly some other use cases.
That might be possible follow on work]

> > +
> >  #undef TARGET_GIMPLIFY_VA_ARG_EXPR
> >  #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
> >
> > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> > index 1eca009e255..08286aa4591 100644
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -3193,6 +3193,13 @@ the standard C library can be guaranteed not to throw an exception
> >  with the notable exceptions of @code{qsort} and @code{bsearch} that
> >  take function pointer arguments.
> >
> > +@item no_vartrace
> > +@cindex @code{no_vartrace} function or variable attribute
> > +Disable data tracing for the function or variable or structured field
> > +marked with this attribute. Applies to types. Currently implemented
> > +for x86 when the @option{ptwrite} target option is enabled for systems
> > +that support the @code{PTWRITE} instruction.
>
> How does it apply to types?

Same as it would apply to variables or functions.

So when the whole file or the whole function is traced instances
of the marked type will not be traced.

> > @@ -11933,6 +11933,10 @@ Address Sanitizer shadow memory address.  NULL if Address Sanitizer is not
> >  supported by the target.
> >  @end deftypefn
> >
> > +@deftypefn {Target Hook} tree TARGET_VARTRACE_FUNC (tree @var{type})
> > +Return a builtin to call to trace variables or NULL if not supported by the target.
>
> Please elaborate on the required signature of the builtin, its
> arguments and semantics.
> Is this really expected to be similar enough across architectures to make this a
> middle-end feature rather than a target specific isntrumentation thing
> in md-reorg or so?

I'm not aware of any other architecture having a PTWRITE equivalent today,
but I would assume if one adds one it would look similar. There
are already other architectures that have a Processor Trace equivalent,
like ARM and MIPS.

Yes could move it into config/i386, but I assumed the concept
was generic enough for the generic middle end?`

> >  @hook TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
> > diff --git a/gcc/passes.def b/gcc/passes.def
> > index 24f212c8e31..518cb4ef6f7 100644
> > --- a/gcc/passes.def
> > +++ b/gcc/passes.def
> > @@ -179,6 +179,7 @@ along with GCC; see the file COPYING3.  If not see
> >    NEXT_PASS (pass_oacc_device_lower);
> >    NEXT_PASS (pass_omp_device_lower);
> >    NEXT_PASS (pass_omp_target_link);
> > +  NEXT_PASS (pass_vartrace);
>
> Wow, that's early.  Reasoning for the placement before post-IPA optimizations?

I was hoping my instrumentation would be optimized too,
and also do the instrumentation nearer the original
code.  But yes perhaps it should be later and that might
help with the occasionally redundant PTWRITEs which
are generated today.

Any suggestions where it should be?

Thanks for the useful comments. I'll work on that and repost.

-Andi

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Martin Sebor-2
In reply to this post by Andi Kleen-3
On 11/04/2018 12:32 AM, Andi Kleen wrote:

> From: Andi Kleen <[hidden email]>
>
> Add a new pass to automatically instrument changes to variables
> with the new PTWRITE instruction on x86. PTWRITE writes a 4 or 8 byte
> field into an Processor Trace log, which allows log over head
> logging of informatin.
>
> This allows to reconstruct how values later, which can be useful for
> debugging or other analysis of the program behavior. With the compiler
> support this can be done with without having to manually add instrumentation
> to the code.
>
> Using dwarf information this can be later mapped back to the variables.
>
> There are new options to enable instrumentation for different types,
> and also a new attribute to control analysis fine grained per
> function or variable level. The attributes can be set on both
> the variable and the type level, and also on structure fields.
> This allows to enable tracing only for specific code in large
> programs.
>
> The pass is generic, but only the x86 backend enables the necessary
> hooks. When the backend enables the necessary hooks (with -mptwrite)
> there is an additional pass that looks through the code for
> attribute vartrace enabled functions or variables.
>
> The -fvartrace-locals options is experimental: it works, but it
> generates redundant ptwrites because the pass doesn't use
> the SSA information to minimize instrumentation. This could be optimized
> later.
>
> Currently the code can be tested with SDE, or on a Intel
> Gemini Lake system with a new enough Linux kernel (v4.10+)
> that supports PTWRITE for PT. Linux perf can be used to
> record the values
>
> perf record -e intel_pt/ptw=1,branch=0/ program
> perf script --itrace=crw -F +synth ...
>
> I have an experimential version of perf that can also use
> dwarf information to symbolize many[1] values back to their variable
> names. So far it is not in standard perf, but available at
>
> https://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git/log/?h=perf/var-resolve-4
>
> It is currently not able to decode all variable locations to names,
> but a large subset.
>
> Longer term hopefully gdb will support this information too.
>
> The CPU can potentially generate very data high bandwidths when
> code doing a lot of computation is heavily instrumented.
> This can cause some data loss in both the CPU and also in perf
> logging the data when the disk cannot keep up.
>
> Running some larger workloads most workloads do not cause
> CPU level overflows, but I've seen it with -fvartrace
> with crafty, and with more workloads with -fvartrace-locals.
>
> Recommendation is to not fully instrument programs,
> but only areas of interest either at the file level or using
> the attributes.
>
> The other thing is that perf and the disk often cannot keep up
> with the data bandwidth for longer computations. In this case
> it's possible to use perf snapshot mode (add --snapshot
> to the command line above). The data will be only logged to
> a memory ring buffer then, and only dump the buffers on events
> of interest by sending SIGUSR2 to the perf binrary.
>
> In the future this will be hopefully better supported with
> core files and gdb.
>
> Passes bootstrap and test suite on x86_64-linux, also
> bootstrapped and tested gcc itself with full -fvartrace
> and -fvartrace-locals instrumentation.

(I initially meant to just suggest detecting and rejecting the two
mutually exclusive attributes but as I read the rest of the patch
to better understand what it's about I noticed a few other issues
I thought would be useful to point out.)

...

> diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
> index 4416b5042f7..66bbd87921f 100644
> --- a/gcc/c-family/c-attribs.c
> +++ b/gcc/c-family/c-attribs.c
> @@ -325,6 +327,12 @@ const struct attribute_spec c_common_attribute_table[] =
>    { "no_instrument_function", 0, 0, true,  false, false, false,
>        handle_no_instrument_function_attribute,
>        NULL },
> +  { "vartrace",      0, 0, false,  false, false, false,
> +      handle_vartrace_attribute,
> +      NULL },
> +  { "no_vartrace",      0, 0, false,  false, false, false,
> +      handle_vartrace_attribute,
> +      NULL },
>    { "no_profile_instrument_function",  0, 0, true, false, false, false,
>        handle_no_profile_instrument_function_attribute,
>        NULL },

Unless mixing these attributes on the same declaration makes sense
I would suggest to either define the exclusions that should be
automatically applied to the attributes (see attribute exclusions),
or to enforce them in the handler.  Judging only by the names it
looks to me like vartrace should be mutually exclusive with
no_vartrace.

> @@ -767,6 +775,21 @@ handle_no_sanitize_undefined_attribute (tree *node, tree name, tree, int,
>    return NULL_TREE;
>  }
>
> +/* Handle "vartrace"/"no_vartrace" attributes; arguments as in
> +   struct attribute_spec.handler.  */
> +
> +static tree
> +handle_vartrace_attribute (tree *node, tree, tree, int flags,
> +   bool *)
> +{
> +  if (TYPE_P (*node) && !(flags & (int) ATTR_FLAG_TYPE_IN_PLACE))
> +    *node = build_variant_type_copy (*node);
> +
> +  /* Can apply to types, functions, variables.  */

I suspect the attribute shouldn't be applied to LABEL_DECLs but
the code suggests it's accepted there.  Does the patch reject it
on labels?  (I think it should with at least a warning.)

Similarly, does it make sense to apply the attribute to CONST_DECLs
such as enumerators?  If not, then I would suggest to give a warning
for those as well, mentioning the kind of symbols it either does or
doesn't apply to.  Ditto there any other DECLs that it doesn't apply
to.

Also, if I understand things correctly, it seems that the handler
would benefit from making use of TARGET_VARTRACE_FUNC() to give
a warning when a target doesn't support tracing at all, and
perhaps also a different warning when it does but not for variables
of the given type/precision, etc.  This might need to be controlled
by a different option than -Wattributes.

> +  /* We lookup it up later with lookup_attribute.  */
> +  return NULL_TREE;
> +}
> +
>  /* Handle an "asan odr indicator" attribute; arguments as in
>     struct attribute_spec.handler.  */
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 2971dc21b1f..930acf40588 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2811,6 +2811,30 @@ ftree-scev-cprop
>  Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
>  Enable copy propagation of scalar-evolution information.
>
> +fvartrace
> +Common Report Var(flag_vartrace)
> +Generate all variable tracking instrumentations, except for locals.
> +
> +fvartrace-returns
> +Common Report Var(flag_vartrace_returns)
> +Generate variable tracking instructions for function returns.
> +
> +fvartrace-args
> +Common Report Var(flag_vartrace_args)
> +Generate variable tracking instructions for function arguments.
> +
> +fvartrace-reads
> +Common Report Var(flag_vartrace_reads)
> +Generate variable tracking instructions for reads.
> +
> +fvartrace-writes
> +Common Report Var(flag_vartrace_writes)
> +Generate variable tracking instructions for writes.
> +
> +fvartrace-locals
> +Common Report Var(flag_vartrace_locals)
> +Generate variable tracking instructions for locals.
> +
>  ; -fverbose-asm causes extra commentary information to be produced in
>  ; the generated assembly code (to make it more readable).  This option
>  ; is generally only of use to those who actually need to read the
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 490bb6292a8..4337121c239 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -31873,6 +31873,19 @@ ix86_mangle_function_version_assembler_name (tree decl, tree id)
>  }
>
>
> +static tree
> +ix86_vartrace_func (tree type)
> +{
> +  if (!(ix86_isa_flags2 & OPTION_MASK_ISA_PTWRITE))
> +    return NULL;
> +  if (TYPE_PRECISION (type) == 32)
> +    return ix86_builtins [(int) IX86_BUILTIN_PTWRITE32];
> +  else if (TYPE_PRECISION (type) == 64)
> +    return ix86_builtins [(int) IX86_BUILTIN_PTWRITE64];
> +  else
> +    return NULL;
> +}

Do I understand correctly that the tracing is limited to variables
of 32-bit and 64-bit scalar types?  If so, shouldn't the attribute
handler detect when the attribute is applied to variables of other
types/sizes/precisions and give a warning that tracing is not
supported there?  (I think it should though portability to other
targets with support for other types/sizes might need to be
considered.)  I would suggest to certainly document this limitation
for each target.

> +
>  static tree
>  ix86_mangle_decl_assembler_name (tree decl, tree id)
>  {
> @@ -50849,6 +50862,9 @@ ix86_run_selftests (void)
>  #undef TARGET_ASAN_SHADOW_OFFSET
>  #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
>
> +#undef TARGET_VARTRACE_FUNC
> +#define TARGET_VARTRACE_FUNC ix86_vartrace_func
> +
>  #undef TARGET_GIMPLIFY_VA_ARG_EXPR
>  #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
>
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 1eca009e255..08286aa4591 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -3193,6 +3193,13 @@ the standard C library can be guaranteed not to throw an exception
>  with the notable exceptions of @code{qsort} and @code{bsearch} that
>  take function pointer arguments.
>
> +@item no_vartrace
> +@cindex @code{no_vartrace} function or variable attribute

If it also applies to types (as stated below) it's not just
a "function or variable attribute."  I think conventionally
this text describes the kind of attribute that's discussed in
the subsequent paragraphs, so it should probably read "function
attribute" for a function attribute and "variable attribute" for
a variable attribute, etc.

> +Disable data tracing for the function or variable or structured field

Typo: "structure field" or "structure member" (not structured).

> +marked with this attribute. Applies to types. Currently implemented
> +for x86 when the @option{ptwrite} target option is enabled for systems
> +that support the @code{PTWRITE} instruction.

I would suggest to use full sentences in the manual (as in other
entries in this section).  Such as:

   The @code{no_vartrace} attribute disables data tracing for
   the function [or variable or structure field] declared with
   the attribute.  The attribute is currently implemented for
   x86 when the @option{ptwrite} target option is enabled for
   systems that support the @code{PTWRITE} instruction.  See
   @pxref{Common Variable Attributes} and @pxref{Common Type
   Attributes}.

There is no description of the effect of the attribute on
a function.  That seems important to discuss.

Since the attribute applies to types it should also be listed
in the Common Type Attributes section (if it isn't -- I only
see it mentioned twice in the patch) and its effects on types
explained there.  (On further thought, if the sentence "Applies
to types." means something else than that it can also be applied
to type definitions it should be clarified.)

> +
>  @item optimize (@var{level}, @dots{})
>  @item optimize (@var{string}, @dots{})
>  @cindex @code{optimize} function attribute
> @@ -3454,6 +3461,12 @@ When applied to a member function of a C++ class template, the
>  attribute also means that the function is instantiated if the
>  class itself is instantiated.
>
> +@item vartrace
> +@cindex @code{vartrace} function or variable attribute
> +Enable data tracing for the function or variable or structure field
> +marked with this attribute. Applies to types. Will not trace locals,
> +but arguments, returns, globals, pointer references.

Same as above.  I'm guessing this text describes the effects of
the attribute on a function.  It feels like it could use quite
a bit more detail.  For instance, how do the function attribute
interact with type or variable attributes specified on objects
accessed by the function.  What is the effect on functions
inlined into a caller declared with the attribute?

> +
>  @item visibility ("@var{visibility_type}")
>  @cindex @code{visibility} function attribute
>  This attribute affects the linkage of the declaration to which it is attached.
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index cb5bc7bafc5..2f10b3c1023 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -2720,6 +2720,35 @@ Don't use the @code{__cxa_get_exception_ptr} runtime routine.  This
>  causes @code{std::uncaught_exception} to be incorrect, but is necessary
>  if the runtime routine is not available.
>
> +@item -fvartrace
> +@opindex -fvartrace
> +Insert trace instructions to trace variable values at runtime.
> +Requires enabling a backend specific option, like @option{-mptwrite} to enable
> +@code{PTWRITE} instruction generation on x86. @option{-fvartrace} traces
> +arguments, return values, pointer references and globals, but no locals.

Not to be too pedantic but presumably it traces also static locals.
If so, suggest to refer to "objects with static storage duration"
instead and perhaps also "objects with thread storage duration" if
it applies to those as well.

> +
> +@item -fvartrace-args
> +@opindex -fvartrace-args
> +Trace arguments. Can be used independently or together with @option{-vartrace},
> +or as @option{-fno-vartrace-args} to disable.
> +
> +@item -fvartrace-returns
> +@opindex -fvartrace-returns
> +Trace return values.  Can be used independently or together with @option{-vartrace},
> +or as @option{-fno-vartrace-return} to disable.
> +
> +@item -fvartrace-reads
> +@opindex -fvartrace-reads
> +Trace reads.
> +
> +@item -fvartrace-writes
> +@opindex -fvartrace-writes
> +Trace writes.
> +
> +@item -fvartrace-locals
> +@opindex -fvartrace-locals
> +Insert code to trace local variables. This can have high overhead.
> +
>  @item -fvisibility-inlines-hidden
>  @opindex fvisibility-inlines-hidden
>  This switch declares that the user does not attempt to compare
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index f841527f971..6555cb122e9 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11933,6 +11933,10 @@ Address Sanitizer shadow memory address.  NULL if Address Sanitizer is not
>  supported by the target.
>  @end deftypefn
>
> +@deftypefn {Target Hook} tree TARGET_VARTRACE_FUNC (tree @var{type})
> +Return a builtin to call to trace variables or NULL if not supported by the target.
> +@end deftypefn

So (IIUC) the hook returns true iff the target supports tracking
of objects of the given type, correct?  (If so, the documentation
should state that.  As it is, it doesn't mention type at all.)

> +
>  @deftypefn {Target Hook} {unsigned HOST_WIDE_INT} TARGET_MEMMODEL_CHECK (unsigned HOST_WIDE_INT @var{val})
>  Validate target specific memory model mask bits. When NULL no target specific
>  memory model bits are allowed.
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 967ef3ad22f..7cce21bb26c 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -8101,6 +8101,8 @@ and the associated definitions of those functions.
>
>  @hook TARGET_ASAN_SHADOW_OFFSET
>
> +@hook TARGET_VARTRACE_FUNC
> +
>  @hook TARGET_MEMMODEL_CHECK
>
>  @hook TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
> diff --git a/gcc/passes.def b/gcc/passes.def
> index 24f212c8e31..518cb4ef6f7 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -179,6 +179,7 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_oacc_device_lower);
>    NEXT_PASS (pass_omp_device_lower);
>    NEXT_PASS (pass_omp_target_link);
> +  NEXT_PASS (pass_vartrace);
>    NEXT_PASS (pass_all_optimizations);
>    PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
>        NEXT_PASS (pass_remove_cgraph_callee_edges);
> diff --git a/gcc/target.def b/gcc/target.def
> index ad27d352ca4..db5d88efb95 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -4300,6 +4300,13 @@ supported by the target.",
>   unsigned HOST_WIDE_INT, (void),
>   NULL)
>
> +/* Defines the builtin to trace variables, or NULL.  */
> +DEFHOOK
> +(vartrace_func,
> + "Return a builtin to call to trace variables or NULL if not supported by the target.",
> + tree, (tree type),

Same as above (how is TYPE used?)

> + NULL)
> +
>  /* Functions relating to calls - argument passing, returns, etc.  */
>  /* Members of struct call have no special macro prefix.  */
>  HOOK_VECTOR (TARGET_CALLS, calls)
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index af15adc8e0c..2cf31785a6f 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -423,6 +423,7 @@ extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_post_ipa_warn (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_stdarg (gcc::context *ctxt);
> +extern gimple_opt_pass *make_pass_vartrace (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
> diff --git a/gcc/tree-vartrace.c b/gcc/tree-vartrace.c
> new file mode 100644
> index 00000000000..07f5aa6bc8f
> --- /dev/null
> +++ b/gcc/tree-vartrace.c
> @@ -0,0 +1,463 @@
> +/* Insert instructions for data value tracing.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   Contributed by Andi Kleen.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "target.h"
> +#include "tree.h"
> +#include "tree-iterator.h"
> +#include "tree-pass.h"
> +#include "basic-block.h"
> +#include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "gimplify.h"
> +#include "gimplify-me.h"
> +#include "gimple-ssa.h"
> +#include "gimple-pretty-print.h"
> +#include "cfghooks.h"
> +#include "ssa.h"
> +#include "tree-dfa.h"
> +#include "attribs.h"
> +
> +enum attrstate { force_off, force_on, neutral };
> +
> +/* Can we trace with attributes ATTR.  */
> +
> +static attrstate supported_attr (tree attr)
> +{
> +  if (lookup_attribute ("no_vartrace", attr))
> +    return force_off;
> +  if (lookup_attribute ("vartrace", attr))
> +    return force_on;

Does this imply that no_vartrace overrides vartrace on the same
declaration?  Unless that's the intended design (in which case
I would expect to see it mentioned in the manual and tested)
I think it's preferable to have the attribute handler treat
these consistently with similar either/or attributes such as
cold/hot, or noreturn/returns_nonnull, etc. (i.e., warn and
drop the conflicting one that's being added so back ends
don't have to worry about ambiguous attribute combinations).

> +  return neutral;
> +}
> +
> +/* Is ARG supported considering S, handling both decls and other trees.  */
> +
> +static attrstate supported_op (tree arg, attrstate s)
> +{
> +  if (s != neutral)
> +    return s;
> +  if (DECL_P (arg))
> +    {
> +      s = supported_attr (DECL_ATTRIBUTES (arg));
> +      if (s != neutral)
> + return s;
> +    }
> +  return supported_attr (TYPE_ATTRIBUTES (TREE_TYPE (arg)));
> +}
> +
> +/* Can we trace T.  */
> +
> +static attrstate supported_type (tree t)
> +{
> +  tree type = TREE_TYPE (t);
> +
> +  if (!POINTER_TYPE_P (type) && !INTEGRAL_TYPE_P (type))
> +    return force_off;

This looks like tracing is supported only for integers and
pointers but not other types (such as floats).  If I'm reading
that correctly then this restriction too should be exposed to
the attribute handler so meaningless uses of the attribute could
be detected (again, perhaps under an option other than
-Wattributes).

> +  enum attrstate s = supported_op (t, neutral);
> +  if (TREE_CODE (t) == COMPONENT_REF
> +   || TREE_CODE (t) == ARRAY_REF)
> +    {
> +      s = supported_op (TREE_OPERAND (t, 0), s);
> +      s = supported_op (TREE_OPERAND (t, 1), s);

This may be a naive question but what will this code do for:

   extern __attribute__((no_vartrace)) int a[];
   extern __attribute__((vartrace)) int idx;

   int f (void) { return a[idx]; }

Will it trace the read of IDX?  (I would expect it to but I'm
not sure the code will have that effect.)  A more detailed
comment might help.

> +    }
> +  return s;
> +}
> +
> +/* Can we trace T, or if FORCE is set.  */
> +
> +static bool supported_type_or_force (tree t, bool force)
> +{
> +  enum attrstate s = supported_type (t);
> +  if (s == neutral)
> +    return force;
> +  return s == force_off ? false : true;
> +}
> +
> +/* Return true if T refering to a local variable.

Typo: "Returns true if T refers to a local variable."

Though since T is assumed to be a DECL then naming it decl would
make that obvious at the first sight.  (I would suggest to use
that naming convention for all such function arguments in this
file.)

I did not look at the rest of the patch.

Thanks
Martin
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/3] Add PTWRITE builtins for x86

Segher Boessenkool
In reply to this post by Uros Bizjak-3
On Thu, Nov 08, 2018 at 06:30:21PM +0100, Uros Bizjak wrote:
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/ptwrite2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mptwrite " } */
> +/* { dg-final { scan-assembler "ptwrite.*r" } } */
> +/* { dg-final { scan-assembler "ptwrite.*e" } } */
>
> Better use \[^\n\r\] instead of .* to avoid unwanted multi-line matches.

Or better, write it as

/* { dg-final { scan-assembler {(?n)ptwrite.*r} } } */


Segher
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Richard Biener-2
In reply to this post by Andi Kleen-5
On Fri, Nov 9, 2018 at 7:18 PM Andi Kleen <[hidden email]> wrote:

>
> Hi Richard,
>
> On Fri, Nov 09, 2018 at 04:27:22PM +0100, Richard Biener wrote:
> > > Passes bootstrap and test suite on x86_64-linux, also
> > > bootstrapped and tested gcc itself with full -fvartrace
> > > and -fvartrace-locals instrumentation.
> >
> > So how is this supposed to be used?  I guess in a
> > edit-debug cycle and not for production code?
>
> It can actually be used for production code.
>
> When processor trace is disabled the PTWRITE
> instructions acts as nops. So it's only increasing
> the code foot print. Since the instrumentation
> should only log values which are already computed
> it normally doesn't cause any other code.
>
> Even when it is enabled the primary overhead is the
> additional memory bandwidth, since the CPU can
> do the logging in parallel to other code. As long
> as the instrumentation is not too excessive to generate
> too much memory bandwidth, it might be actually
> quite reasonable to even keep the logging on for
> production code, and use it as a "flight recorder",
> which is dumped on failures.

I see.

> This would also be the model in gdb, once we have support
> in it. You would run the program in the debugger
> and it just logs the data to a memory buffer,
> but when stopping the value history can be examined.

Hmm, so the debugger still needs to relate the ptwrite
instruction with the actual variable the data is for.  I suppose
practically this means that var-tracking needs to be able to
compute a location list for a variable that happens to overlap
with the stored value?

That is, usually debuggers look for a location list of a variable
and find, say, %rax.  But for ptwrite the debugger needs to
examine all active location lists for, say, %rax and figure out
that it contains the value for variable 'a'?

When there isn't any such relation between the ptwrite stored
value and any variable the ptwrite is useless, right?

> There's also some ongoing work to add (optional) support
> for PT to Linux crash dumps, so eventually that will
> work without having to always run the debugger.
>
> Today it can be done by running perf in the background
> to record the PT, however there the setup is a bit
> more complicated.
>
> The primary use case I was envisioning was to set
> the attribute on some critical functions/structures/types
> of interest and then have a very overhead logging
> option for them (generally cheaper than
> equivalent software instrumentation). And then
> they automatically gets logged without the programmer
> needing to add lots of instrumentation code to
> catch every instance. So think of it as a
> "hardware accelerated printf"
>
> >
> > What do you actually write with PTWRITE?  I suppose
> > you need to keep a ID to something mapping somewhere
> > so you can make sense of the perf records?
>
> PTWRITE writes 32bit/64bit values. The CPU reports the
> IP of PTWRITE in the log, either explicitely, or implicitely if branch
> trace is enabled too. The IP can then be used to look up
> the DWARF scope for that IP. Then the decoder
> decodes the operand of PTWRITE and maps it back using
> the dwarf information. So it all works using
> existing debugger infrastructure, and a quite simple
> instruction decoder.
>
> I'll clarify that in the description.
>
> >
> > Few comments inline below, but I'm not sure if this
> > whole thing is interesting for GCC (as opposed to being
> > a instrumentation plugin)
>
> I'm biased, but I think automatic data tracing is a very exciting
> use case, so hopefully it can be considered for mainstream gcc.
>
> > >                               handle_no_profile_instrument_function_attribute,
> > >                               NULL },
> > > @@ -767,6 +775,21 @@ handle_no_sanitize_undefined_attribute (tree *node, tree name, tree, int,
> > >    return NULL_TREE;
> > >  }
> > >
> > > +/* Handle "vartrace"/"no_vartrace" attributes; arguments as in
> > > +   struct attribute_spec.handler.  */
> > > +
> > > +static tree
> > > +handle_vartrace_attribute (tree *node, tree, tree, int flags,
> > > +                          bool *)
> > > +{
> > > +  if (TYPE_P (*node) && !(flags & (int) ATTR_FLAG_TYPE_IN_PLACE))
> > > +    *node = build_variant_type_copy (*node);
> >
> > I don't think you want the attribute on types.  As far as I understood your
> > descriptions it should only be on variables and functions.
>
> The idea was that it's possible to trace all instances of a type,
> especially structure members. Otherwise it will be harder for
> the programmer to hunt down every instance.
>
> For example if I have a structure that is used over a program,
> and one member gets the wrong value.
>
> I can do then in the header:
>
> struct foo {
>         int member __attribute__(("vartrace"));
> };
>
> and then recompile the program. Every instance of writing to
> member will then be automatically instrumented (assuming
> the program stays type safe)
>
> Makes sense?

OK.  The user documentation should be more elaborate here.

> [BTW I considered adding an address trace
> too for pointer writes to hunt down the non type safe
> instances and possibly some other use cases.
> That might be possible follow on work]
>
> > > +
> > >  #undef TARGET_GIMPLIFY_VA_ARG_EXPR
> > >  #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
> > >
> > > diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> > > index 1eca009e255..08286aa4591 100644
> > > --- a/gcc/doc/extend.texi
> > > +++ b/gcc/doc/extend.texi
> > > @@ -3193,6 +3193,13 @@ the standard C library can be guaranteed not to throw an exception
> > >  with the notable exceptions of @code{qsort} and @code{bsearch} that
> > >  take function pointer arguments.
> > >
> > > +@item no_vartrace
> > > +@cindex @code{no_vartrace} function or variable attribute
> > > +Disable data tracing for the function or variable or structured field
> > > +marked with this attribute. Applies to types. Currently implemented
> > > +for x86 when the @option{ptwrite} target option is enabled for systems
> > > +that support the @code{PTWRITE} instruction.
> >
> > How does it apply to types?
>
> Same as it would apply to variables or functions.
>
> So when the whole file or the whole function is traced instances
> of the marked type will not be traced.
>
> > > @@ -11933,6 +11933,10 @@ Address Sanitizer shadow memory address.  NULL if Address Sanitizer is not
> > >  supported by the target.
> > >  @end deftypefn
> > >
> > > +@deftypefn {Target Hook} tree TARGET_VARTRACE_FUNC (tree @var{type})
> > > +Return a builtin to call to trace variables or NULL if not supported by the target.
> >
> > Please elaborate on the required signature of the builtin, its
> > arguments and semantics.
> > Is this really expected to be similar enough across architectures to make this a
> > middle-end feature rather than a target specific isntrumentation thing
> > in md-reorg or so?
>
> I'm not aware of any other architecture having a PTWRITE equivalent today,
> but I would assume if one adds one it would look similar. There
> are already other architectures that have a Processor Trace equivalent,
> like ARM and MIPS.
>
> Yes could move it into config/i386, but I assumed the concept
> was generic enough for the generic middle end?`
>
> > >  @hook TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
> > > diff --git a/gcc/passes.def b/gcc/passes.def
> > > index 24f212c8e31..518cb4ef6f7 100644
> > > --- a/gcc/passes.def
> > > +++ b/gcc/passes.def
> > > @@ -179,6 +179,7 @@ along with GCC; see the file COPYING3.  If not see
> > >    NEXT_PASS (pass_oacc_device_lower);
> > >    NEXT_PASS (pass_omp_device_lower);
> > >    NEXT_PASS (pass_omp_target_link);
> > > +  NEXT_PASS (pass_vartrace);
> >
> > Wow, that's early.  Reasoning for the placement before post-IPA optimizations?
>
> I was hoping my instrumentation would be optimized too,
> and also do the instrumentation nearer the original
> code.  But yes perhaps it should be later and that might
> help with the occasionally redundant PTWRITEs which
> are generated today.
>
> Any suggestions where it should be?

I would have picked a location right before RTL expansion.

OTOH given the likely restriction with regarding to useful debug
info suggested above a place between var-tracking and late
debug generation seems best?  That also avoids register allocation
side-effects (the ptwrite is probably an UNSPEC?).  There's
conveniently the md-reorg pass in that area but if other targets
can do sth similar then a common pass working might be good
as well.

I hope you don't mind if this eventually slips to GCC 10 given
as you say there is no HW available right now.  (still waiting
for a CPU with CET ...)

Thanks,
Richard.

> Thanks for the useful comments. I'll work on that and repost.
>
> -Andi
>
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Martin Sebor-2
In reply to this post by Andi Kleen-3
One other high-level comment: a more powerful interface to
variable tracing than annotating declarations in the source
would be to provide either the names of the symbols to trace
on the command line or in an external file.  That way tracing
could be enabled for objects and types declared in read-only
files (such as system headers), and would let the user more
easily experiment with annotations.

This could be in addition to the attributes, and would require
coming up with a way of identifying symbols with internal or
no linkage, such as local variables, and perhaps also function
arguments, return values, etc., if this mechanisms were to
provide access to those as well (I think it would be fine if
this "external" mechanism provided support to only a subset
of symbols).

Martin

On 11/04/2018 12:32 AM, Andi Kleen wrote:

> From: Andi Kleen <[hidden email]>
>
> Add a new pass to automatically instrument changes to variables
> with the new PTWRITE instruction on x86. PTWRITE writes a 4 or 8 byte
> field into an Processor Trace log, which allows log over head
> logging of informatin.
>
> This allows to reconstruct how values later, which can be useful for
> debugging or other analysis of the program behavior. With the compiler
> support this can be done with without having to manually add instrumentation
> to the code.
>
> Using dwarf information this can be later mapped back to the variables.
>
> There are new options to enable instrumentation for different types,
> and also a new attribute to control analysis fine grained per
> function or variable level. The attributes can be set on both
> the variable and the type level, and also on structure fields.
> This allows to enable tracing only for specific code in large
> programs.
>
> The pass is generic, but only the x86 backend enables the necessary
> hooks. When the backend enables the necessary hooks (with -mptwrite)
> there is an additional pass that looks through the code for
> attribute vartrace enabled functions or variables.
>
> The -fvartrace-locals options is experimental: it works, but it
> generates redundant ptwrites because the pass doesn't use
> the SSA information to minimize instrumentation. This could be optimized
> later.
>
> Currently the code can be tested with SDE, or on a Intel
> Gemini Lake system with a new enough Linux kernel (v4.10+)
> that supports PTWRITE for PT. Linux perf can be used to
> record the values
>
> perf record -e intel_pt/ptw=1,branch=0/ program
> perf script --itrace=crw -F +synth ...
>
> I have an experimential version of perf that can also use
> dwarf information to symbolize many[1] values back to their variable
> names. So far it is not in standard perf, but available at
>
> https://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git/log/?h=perf/var-resolve-4
>
> It is currently not able to decode all variable locations to names,
> but a large subset.
>
> Longer term hopefully gdb will support this information too.
>
> The CPU can potentially generate very data high bandwidths when
> code doing a lot of computation is heavily instrumented.
> This can cause some data loss in both the CPU and also in perf
> logging the data when the disk cannot keep up.
>
> Running some larger workloads most workloads do not cause
> CPU level overflows, but I've seen it with -fvartrace
> with crafty, and with more workloads with -fvartrace-locals.
>
> Recommendation is to not fully instrument programs,
> but only areas of interest either at the file level or using
> the attributes.
>
> The other thing is that perf and the disk often cannot keep up
> with the data bandwidth for longer computations. In this case
> it's possible to use perf snapshot mode (add --snapshot
> to the command line above). The data will be only logged to
> a memory ring buffer then, and only dump the buffers on events
> of interest by sending SIGUSR2 to the perf binrary.
>
> In the future this will be hopefully better supported with
> core files and gdb.
>
> Passes bootstrap and test suite on x86_64-linux, also
> bootstrapped and tested gcc itself with full -fvartrace
> and -fvartrace-locals instrumentation.
>
> gcc/:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
> * Makefile.in: Add tree-vartrace.o.
> * common.opt: Add -fvartrace, -fvartrace-returns,
> -fvartrace-args, -fvartrace-reads, -fvartrace-writes,
> -fvartrace-locals
> * config/i386/i386.c (ix86_vartrace_func): Add.
> (TARGET_VARTRACE_FUNC): Add.
> * doc/extend.texi: Document vartrace/no_vartrace
> attributes.
> * doc/invoke.texi: Document -fvartrace, -fvartrace-returns,
> -fvartrace-args, -fvartrace-reads, -fvartrace-writes,
> -fvartrace-locals
> * doc/tm.texi (TARGET_VARTRACE_FUNC): Add.
> * passes.def: Add vartrace pass.
> * target.def (vartrace_func): Add.
> * tree-pass.h (make_pass_vartrace): Add.
> * tree-vartrace.c: New file to implement vartrace pass.
>
> gcc/c-family/:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
> * c-attribs.c (handle_vartrace_attribute): New function.
>
> config/:
>
> 2018-11-03  Andi Kleen  <[hidden email]>
>
> * bootstrap-vartrace.mk: New.
> * bootstrap-vartrace-locals.mk: New.
> ---
>  config/bootstrap-vartrace-locals.mk |   3 +
>  config/bootstrap-vartrace.mk        |   3 +
>  gcc/Makefile.in                     |   1 +
>  gcc/c-family/c-attribs.c            |  23 ++
>  gcc/common.opt                      |  24 ++
>  gcc/config/i386/i386.c              |  16 +
>  gcc/doc/extend.texi                 |  13 +
>  gcc/doc/invoke.texi                 |  29 ++
>  gcc/doc/tm.texi                     |   4 +
>  gcc/doc/tm.texi.in                  |   2 +
>  gcc/passes.def                      |   1 +
>  gcc/target.def                      |   7 +
>  gcc/tree-pass.h                     |   1 +
>  gcc/tree-vartrace.c                 | 463 ++++++++++++++++++++++++++++
>  14 files changed, 590 insertions(+)
>  create mode 100644 config/bootstrap-vartrace-locals.mk
>  create mode 100644 config/bootstrap-vartrace.mk
>  create mode 100644 gcc/tree-vartrace.c
>
> diff --git a/config/bootstrap-vartrace-locals.mk b/config/bootstrap-vartrace-locals.mk
> new file mode 100644
> index 00000000000..c6c79e21120
> --- /dev/null
> +++ b/config/bootstrap-vartrace-locals.mk
> @@ -0,0 +1,3 @@
> +STAGE2_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
> +STAGE3_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
> +STAGE4_CFLAGS += -mptwrite -fvartrace -fvartrace-locals
> diff --git a/config/bootstrap-vartrace.mk b/config/bootstrap-vartrace.mk
> new file mode 100644
> index 00000000000..e29824d799b
> --- /dev/null
> +++ b/config/bootstrap-vartrace.mk
> @@ -0,0 +1,3 @@
> +STAGE2_CFLAGS += -mptwrite -fvartrace
> +STAGE3_CFLAGS += -mptwrite -fvartrace
> +STAGE4_CFLAGS += -mptwrite -fvartrace
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 719a516c356..46aa4800e57 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1594,6 +1594,7 @@ OBJS = \
>   tree-vectorizer.o \
>   tree-vector-builder.o \
>   tree-vrp.o \
> + tree-vartrace.o \
>   tree.o \
>   typed-splay-tree.o \
>   unique-ptr-tests.o \
> diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
> index 4416b5042f7..66bbd87921f 100644
> --- a/gcc/c-family/c-attribs.c
> +++ b/gcc/c-family/c-attribs.c
> @@ -104,6 +104,8 @@ static tree handle_tls_model_attribute (tree *, tree, tree, int,
>   bool *);
>  static tree handle_no_instrument_function_attribute (tree *, tree,
>       tree, int, bool *);
> +static tree handle_vartrace_attribute (tree *, tree,
> +     tree, int, bool *);
>  static tree handle_no_profile_instrument_function_attribute (tree *, tree,
>       tree, int, bool *);
>  static tree handle_malloc_attribute (tree *, tree, tree, int, bool *);
> @@ -325,6 +327,12 @@ const struct attribute_spec c_common_attribute_table[] =
>    { "no_instrument_function", 0, 0, true,  false, false, false,
>        handle_no_instrument_function_attribute,
>        NULL },
> +  { "vartrace",      0, 0, false,  false, false, false,
> +      handle_vartrace_attribute,
> +      NULL },
> +  { "no_vartrace",      0, 0, false,  false, false, false,
> +      handle_vartrace_attribute,
> +      NULL },
>    { "no_profile_instrument_function",  0, 0, true, false, false, false,
>        handle_no_profile_instrument_function_attribute,
>        NULL },
> @@ -767,6 +775,21 @@ handle_no_sanitize_undefined_attribute (tree *node, tree name, tree, int,
>    return NULL_TREE;
>  }
>
> +/* Handle "vartrace"/"no_vartrace" attributes; arguments as in
> +   struct attribute_spec.handler.  */
> +
> +static tree
> +handle_vartrace_attribute (tree *node, tree, tree, int flags,
> +   bool *)
> +{
> +  if (TYPE_P (*node) && !(flags & (int) ATTR_FLAG_TYPE_IN_PLACE))
> +    *node = build_variant_type_copy (*node);
> +
> +  /* Can apply to types, functions, variables.  */
> +  /* We lookup it up later with lookup_attribute.  */
> +  return NULL_TREE;
> +}
> +
>  /* Handle an "asan odr indicator" attribute; arguments as in
>     struct attribute_spec.handler.  */
>
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 2971dc21b1f..930acf40588 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -2811,6 +2811,30 @@ ftree-scev-cprop
>  Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
>  Enable copy propagation of scalar-evolution information.
>
> +fvartrace
> +Common Report Var(flag_vartrace)
> +Generate all variable tracking instrumentations, except for locals.
> +
> +fvartrace-returns
> +Common Report Var(flag_vartrace_returns)
> +Generate variable tracking instructions for function returns.
> +
> +fvartrace-args
> +Common Report Var(flag_vartrace_args)
> +Generate variable tracking instructions for function arguments.
> +
> +fvartrace-reads
> +Common Report Var(flag_vartrace_reads)
> +Generate variable tracking instructions for reads.
> +
> +fvartrace-writes
> +Common Report Var(flag_vartrace_writes)
> +Generate variable tracking instructions for writes.
> +
> +fvartrace-locals
> +Common Report Var(flag_vartrace_locals)
> +Generate variable tracking instructions for locals.
> +
>  ; -fverbose-asm causes extra commentary information to be produced in
>  ; the generated assembly code (to make it more readable).  This option
>  ; is generally only of use to those who actually need to read the
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 490bb6292a8..4337121c239 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -31873,6 +31873,19 @@ ix86_mangle_function_version_assembler_name (tree decl, tree id)
>  }
>
>
> +static tree
> +ix86_vartrace_func (tree type)
> +{
> +  if (!(ix86_isa_flags2 & OPTION_MASK_ISA_PTWRITE))
> +    return NULL;
> +  if (TYPE_PRECISION (type) == 32)
> +    return ix86_builtins [(int) IX86_BUILTIN_PTWRITE32];
> +  else if (TYPE_PRECISION (type) == 64)
> +    return ix86_builtins [(int) IX86_BUILTIN_PTWRITE64];
> +  else
> +    return NULL;
> +}
> +
>  static tree
>  ix86_mangle_decl_assembler_name (tree decl, tree id)
>  {
> @@ -50849,6 +50862,9 @@ ix86_run_selftests (void)
>  #undef TARGET_ASAN_SHADOW_OFFSET
>  #define TARGET_ASAN_SHADOW_OFFSET ix86_asan_shadow_offset
>
> +#undef TARGET_VARTRACE_FUNC
> +#define TARGET_VARTRACE_FUNC ix86_vartrace_func
> +
>  #undef TARGET_GIMPLIFY_VA_ARG_EXPR
>  #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
>
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 1eca009e255..08286aa4591 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -3193,6 +3193,13 @@ the standard C library can be guaranteed not to throw an exception
>  with the notable exceptions of @code{qsort} and @code{bsearch} that
>  take function pointer arguments.
>
> +@item no_vartrace
> +@cindex @code{no_vartrace} function or variable attribute
> +Disable data tracing for the function or variable or structured field
> +marked with this attribute. Applies to types. Currently implemented
> +for x86 when the @option{ptwrite} target option is enabled for systems
> +that support the @code{PTWRITE} instruction.
> +
>  @item optimize (@var{level}, @dots{})
>  @item optimize (@var{string}, @dots{})
>  @cindex @code{optimize} function attribute
> @@ -3454,6 +3461,12 @@ When applied to a member function of a C++ class template, the
>  attribute also means that the function is instantiated if the
>  class itself is instantiated.
>
> +@item vartrace
> +@cindex @code{vartrace} function or variable attribute
> +Enable data tracing for the function or variable or structure field
> +marked with this attribute. Applies to types. Will not trace locals,
> +but arguments, returns, globals, pointer references.
> +
>  @item visibility ("@var{visibility_type}")
>  @cindex @code{visibility} function attribute
>  This attribute affects the linkage of the declaration to which it is attached.
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index cb5bc7bafc5..2f10b3c1023 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -2720,6 +2720,35 @@ Don't use the @code{__cxa_get_exception_ptr} runtime routine.  This
>  causes @code{std::uncaught_exception} to be incorrect, but is necessary
>  if the runtime routine is not available.
>
> +@item -fvartrace
> +@opindex -fvartrace
> +Insert trace instructions to trace variable values at runtime.
> +Requires enabling a backend specific option, like @option{-mptwrite} to enable
> +@code{PTWRITE} instruction generation on x86. @option{-fvartrace} traces
> +arguments, return values, pointer references and globals, but no locals.
> +
> +@item -fvartrace-args
> +@opindex -fvartrace-args
> +Trace arguments. Can be used independently or together with @option{-vartrace},
> +or as @option{-fno-vartrace-args} to disable.
> +
> +@item -fvartrace-returns
> +@opindex -fvartrace-returns
> +Trace return values.  Can be used independently or together with @option{-vartrace},
> +or as @option{-fno-vartrace-return} to disable.
> +
> +@item -fvartrace-reads
> +@opindex -fvartrace-reads
> +Trace reads.
> +
> +@item -fvartrace-writes
> +@opindex -fvartrace-writes
> +Trace writes.
> +
> +@item -fvartrace-locals
> +@opindex -fvartrace-locals
> +Insert code to trace local variables. This can have high overhead.
> +
>  @item -fvisibility-inlines-hidden
>  @opindex fvisibility-inlines-hidden
>  This switch declares that the user does not attempt to compare
> diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
> index f841527f971..6555cb122e9 100644
> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi
> @@ -11933,6 +11933,10 @@ Address Sanitizer shadow memory address.  NULL if Address Sanitizer is not
>  supported by the target.
>  @end deftypefn
>
> +@deftypefn {Target Hook} tree TARGET_VARTRACE_FUNC (tree @var{type})
> +Return a builtin to call to trace variables or NULL if not supported by the target.
> +@end deftypefn
> +
>  @deftypefn {Target Hook} {unsigned HOST_WIDE_INT} TARGET_MEMMODEL_CHECK (unsigned HOST_WIDE_INT @var{val})
>  Validate target specific memory model mask bits. When NULL no target specific
>  memory model bits are allowed.
> diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
> index 967ef3ad22f..7cce21bb26c 100644
> --- a/gcc/doc/tm.texi.in
> +++ b/gcc/doc/tm.texi.in
> @@ -8101,6 +8101,8 @@ and the associated definitions of those functions.
>
>  @hook TARGET_ASAN_SHADOW_OFFSET
>
> +@hook TARGET_VARTRACE_FUNC
> +
>  @hook TARGET_MEMMODEL_CHECK
>
>  @hook TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
> diff --git a/gcc/passes.def b/gcc/passes.def
> index 24f212c8e31..518cb4ef6f7 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -179,6 +179,7 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_oacc_device_lower);
>    NEXT_PASS (pass_omp_device_lower);
>    NEXT_PASS (pass_omp_target_link);
> +  NEXT_PASS (pass_vartrace);
>    NEXT_PASS (pass_all_optimizations);
>    PUSH_INSERT_PASSES_WITHIN (pass_all_optimizations)
>        NEXT_PASS (pass_remove_cgraph_callee_edges);
> diff --git a/gcc/target.def b/gcc/target.def
> index ad27d352ca4..db5d88efb95 100644
> --- a/gcc/target.def
> +++ b/gcc/target.def
> @@ -4300,6 +4300,13 @@ supported by the target.",
>   unsigned HOST_WIDE_INT, (void),
>   NULL)
>
> +/* Defines the builtin to trace variables, or NULL.  */
> +DEFHOOK
> +(vartrace_func,
> + "Return a builtin to call to trace variables or NULL if not supported by the target.",
> + tree, (tree type),
> + NULL)
> +
>  /* Functions relating to calls - argument passing, returns, etc.  */
>  /* Members of struct call have no special macro prefix.  */
>  HOOK_VECTOR (TARGET_CALLS, calls)
> diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
> index af15adc8e0c..2cf31785a6f 100644
> --- a/gcc/tree-pass.h
> +++ b/gcc/tree-pass.h
> @@ -423,6 +423,7 @@ extern gimple_opt_pass *make_pass_strlen (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_fold_builtins (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_post_ipa_warn (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_stdarg (gcc::context *ctxt);
> +extern gimple_opt_pass *make_pass_vartrace (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_early_warn_uninitialized (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_late_warn_uninitialized (gcc::context *ctxt);
>  extern gimple_opt_pass *make_pass_cse_reciprocals (gcc::context *ctxt);
> diff --git a/gcc/tree-vartrace.c b/gcc/tree-vartrace.c
> new file mode 100644
> index 00000000000..07f5aa6bc8f
> --- /dev/null
> +++ b/gcc/tree-vartrace.c
> @@ -0,0 +1,463 @@
> +/* Insert instructions for data value tracing.
> +   Copyright (C) 2017 Free Software Foundation, Inc.
> +   Contributed by Andi Kleen.
> +
> +This file is part of GCC.
> +
> +GCC is free software; you can redistribute it and/or modify
> +it under the terms of the GNU General Public License as published by
> +the Free Software Foundation; either version 3, or (at your option)
> +any later version.
> +
> +GCC is distributed in the hope that it will be useful,
> +but WITHOUT ANY WARRANTY; without even the implied warranty of
> +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +GNU General Public License for more details.
> +
> +You should have received a copy of the GNU General Public License
> +along with GCC; see the file COPYING3.  If not see
> +<http://www.gnu.org/licenses/>.  */
> +
> +#include "config.h"
> +#include "system.h"
> +#include "coretypes.h"
> +#include "backend.h"
> +#include "target.h"
> +#include "tree.h"
> +#include "tree-iterator.h"
> +#include "tree-pass.h"
> +#include "basic-block.h"
> +#include "gimple.h"
> +#include "gimple-iterator.h"
> +#include "gimplify.h"
> +#include "gimplify-me.h"
> +#include "gimple-ssa.h"
> +#include "gimple-pretty-print.h"
> +#include "cfghooks.h"
> +#include "ssa.h"
> +#include "tree-dfa.h"
> +#include "attribs.h"
> +
> +enum attrstate { force_off, force_on, neutral };
> +
> +/* Can we trace with attributes ATTR.  */
> +
> +static attrstate supported_attr (tree attr)
> +{
> +  if (lookup_attribute ("no_vartrace", attr))
> +    return force_off;
> +  if (lookup_attribute ("vartrace", attr))
> +    return force_on;
> +  return neutral;
> +}
> +
> +/* Is ARG supported considering S, handling both decls and other trees.  */
> +
> +static attrstate supported_op (tree arg, attrstate s)
> +{
> +  if (s != neutral)
> +    return s;
> +  if (DECL_P (arg))
> +    {
> +      s = supported_attr (DECL_ATTRIBUTES (arg));
> +      if (s != neutral)
> + return s;
> +    }
> +  return supported_attr (TYPE_ATTRIBUTES (TREE_TYPE (arg)));
> +}
> +
> +/* Can we trace T.  */
> +
> +static attrstate supported_type (tree t)
> +{
> +  tree type = TREE_TYPE (t);
> +
> +  if (!POINTER_TYPE_P (type) && !INTEGRAL_TYPE_P (type))
> +    return force_off;
> +  enum attrstate s = supported_op (t, neutral);
> +  if (TREE_CODE (t) == COMPONENT_REF
> +   || TREE_CODE (t) == ARRAY_REF)
> +    {
> +      s = supported_op (TREE_OPERAND (t, 0), s);
> +      s = supported_op (TREE_OPERAND (t, 1), s);
> +    }
> +  return s;
> +}
> +
> +/* Can we trace T, or if FORCE is set.  */
> +
> +static bool supported_type_or_force (tree t, bool force)
> +{
> +  enum attrstate s = supported_type (t);
> +  if (s == neutral)
> +    return force;
> +  return s == force_off ? false : true;
> +}
> +
> +/* Return true if T refering to a local variable.
> +   ?? better ways to do this?  */
> +
> +static bool is_local (tree t)
> +{
> +  // Add another attribute to override?
> +  if (!flag_vartrace_locals)
> +    return false;
> +  if (TREE_STATIC (t))
> +    return false;
> +  if (TREE_CODE (t) == VAR_DECL && DECL_EXTERNAL (t))
> +    return false;
> +  return true;
> +}
> +
> +/* Is T something we can log, FORCEing the type if needed.  */
> +
> +static bool supported_mem (tree t, bool force)
> +{
> +  enum attrstate s = supported_type (t);
> +
> +  if (s == force_off)
> +    return false;
> +
> +  switch (TREE_CODE (t))
> +    {
> +    case VAR_DECL:
> +      if (DECL_ARTIFICIAL (t))
> + return false;
> +      if (is_local (t))
> + return true;
> +      return s == force_on || force;
> +
> +    case ARRAY_REF:
> +    case COMPONENT_REF:
> +      t = TREE_OPERAND (t, 0);
> +      if (is_local (t))
> + return true;
> +      return s == force_on || force;
> +
> +    case TARGET_MEM_REF:
> +    case MEM_REF:
> +      // could use points-to to check for locals?
> +      return true;
> +
> +    case SSA_NAME:
> +      if (flag_vartrace_locals && is_gimple_reg (t))
> + return true;
> +      break;
> +
> +    default:
> +      break;
> +    }
> +
> +  return false;
> +}
> +
> +/* Print debugging for inserting CALL at ORIG_STMT with type of VAL.  */
> +
> +static void log_trace_code (gimple *orig_stmt, gimple *code,
> +    tree val)
> +{
> +  if (dump_file)
> +    {
> +      if (orig_stmt)
> + fprintf (dump_file, "BB%d ", gimple_bb (orig_stmt)->index);
> +      fprintf (dump_file, "inserting ");
> +      print_gimple_stmt (dump_file, code, 0, TDF_VOPS|TDF_MEMSYMS);
> +      if (orig_stmt)
> + {
> +  fprintf (dump_file, "orig ");
> +  print_gimple_stmt (dump_file, orig_stmt, 2,
> +     TDF_VOPS|TDF_MEMSYMS);
> + }
> +      fprintf (dump_file, "type ");
> +      print_generic_expr (dump_file, TREE_TYPE (val), TDF_SLIM);
> +      fputc ('\n', dump_file);
> +      fputc ('\n', dump_file);
> +    }
> +}
> +
> +/* Insert variable tracing code for VAL before iterator GI, originally
> +   for ORIG_STMT.  Return trace variable or NULL.  */
> +
> +static tree insert_trace (gimple_stmt_iterator *gi, tree val,
> +  gimple *orig_stmt)
> +{
> +  tree func = targetm.vartrace_func (TREE_TYPE (val));
> +  if (!func)
> +    return NULL_TREE;
> +
> +  location_t loc = gimple_location (orig_stmt);
> +
> +  gimple_seq seq = NULL;
> +  tree tvar = make_ssa_name (TREE_TYPE (val));
> +  gassign *assign = gimple_build_assign (tvar, val);
> +  log_trace_code (orig_stmt, assign, val);
> +  gimple_set_location (assign, loc);
> +  gimple_seq_add_stmt (&seq, assign);
> +
> +  gcall *call = gimple_build_call (func, 1, tvar);
> +  log_trace_code (NULL, call, tvar);
> +  gimple_set_location (call, loc);
> +  gimple_seq_add_stmt (&seq, call);
> +
> +  gsi_insert_seq_before (gi, seq, GSI_SAME_STMT);
> +  return tvar;
> +}
> +
> +/* Insert trace at GI for T in FUN if suitable memory or variable reference.
> +   Always if FORCE. Originally on ORIG_STMT.  */
> +
> +tree instrument_mem (gimple_stmt_iterator *gi, tree t,
> +     bool force,
> +     gimple *orig_stmt)
> +{
> +  if (supported_mem (t, force))
> +    return insert_trace (gi, t, orig_stmt);
> +  return NULL_TREE;
> +}
> +
> +/* Instrument arguments for FUN considering FORCE. Return true if
> +   function has changed.  */
> +
> +bool instrument_args (function *fun, bool force)
> +{
> +  bool changed = false;
> +  gimple_stmt_iterator gi;
> +
> +  /* Local tracing usually takes care of the argument too, when
> +     they are read. This avoids redundant trace instructions.  */
> +  if (flag_vartrace_locals)
> +    return false;
> +
> +  for (tree arg = DECL_ARGUMENTS (current_function_decl);
> +       arg != NULL_TREE;
> +       arg = DECL_CHAIN (arg))
> +    {
> +     gi = gsi_start_bb (BASIC_BLOCK_FOR_FN (fun, NUM_FIXED_BLOCKS));
> +     if (supported_type_or_force (arg, force || flag_vartrace_args))
> + {
> +  tree func = targetm.vartrace_func (TREE_TYPE (arg));
> +  if (!func)
> +    continue;
> +
> +  tree sarg = NULL;
> +  // ??? or force like sanopt?
> +  if (is_gimple_reg (arg))
> +    sarg = get_or_create_ssa_default_def (fun, arg);
> +  if (!sarg)
> +    continue;
> +
> +  if (has_zero_uses (sarg))
> +    continue;
> +
> +  gimple_seq seq = NULL;
> +  tree tvar = make_ssa_name (TREE_TYPE (sarg));
> +  gassign *assign = gimple_build_assign (tvar, sarg);
> +  gimple_set_location (assign, fun->function_start_locus);
> +  gimple_seq_add_stmt (&seq, assign);
> +
> +  gcall *call = gimple_build_call (func, 1, tvar);
> +  log_trace_code (NULL, call, tvar);
> +  gimple_set_location (call, fun->function_start_locus);
> +  gimple_seq_add_stmt (&seq, call);
> +
> +  edge edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (fun));
> +  gsi_insert_seq_on_edge_immediate (edge, seq);
> +
> +  changed = true;
> + }
> +    }
> +  return changed;
> +}
> +
> +/* Generate trace call for store STMT at GI, force if FORCE.  Return true
> +   if successfull. Modifies the original store to use a temporary.  */
> +
> +static bool instrument_store (gimple_stmt_iterator *gi, gimple *stmt, bool force)
> +{
> +  if (!supported_mem (gimple_assign_lhs (stmt), force))
> +    return false;
> +
> +  tree orig_tgt = gimple_assign_lhs (stmt);
> +
> +  tree func = targetm.vartrace_func (TREE_TYPE (orig_tgt));
> +  if (!func)
> +    return false;
> +
> +  tree new_tgt = make_ssa_name(TREE_TYPE (orig_tgt));
> +  gimple_assign_set_lhs (stmt, new_tgt);
> +  update_stmt (stmt);
> +  log_trace_code (NULL, stmt, new_tgt);
> +
> +  gcall *tcall = gimple_build_call (func, 1, new_tgt);
> +  log_trace_code (stmt, tcall, new_tgt);
> +  gimple_set_location (tcall, gimple_location (stmt));
> +  gsi_insert_after (gi, tcall, GSI_CONTINUE_LINKING);
> +
> +  gassign *new_store = gimple_build_assign (orig_tgt, new_tgt);
> +  gimple_set_location (new_store, gimple_location (stmt));
> +  log_trace_code (NULL, new_store, new_tgt);
> +  gsi_insert_after (gi, new_store, GSI_CONTINUE_LINKING);
> +  return true;
> +}
> +
> +/* Instrument STMT at GI. Force if FORCE. CHANGED is the previous changed
> +   state, which is also returned.  */
> +
> +bool instrument_assign (gimple_stmt_iterator *gi,
> + gimple *stmt, bool changed, bool force)
> +{
> +  gassign *gas = as_a <gassign *> (stmt);
> +  bool read_force = force || flag_vartrace_reads;
> +  tree t;
> +
> +  t = instrument_mem (gi, gimple_assign_rhs1 (gas),
> +      read_force,
> +      stmt);
> +  if (t)
> +    {
> +      gimple_assign_set_rhs1 (gas, t);
> +      changed = true;
> +    }
> +  if (gimple_num_ops (gas) > 2)
> +    {
> +      t = instrument_mem (gi, gimple_assign_rhs2 (gas),
> +  read_force,
> +  stmt);
> +      if (t)
> + {
> +  gimple_assign_set_rhs2 (gas, t);
> +  changed = true;
> + }
> +    }
> +  if (gimple_num_ops (gas) > 3)
> +    {
> +      t = instrument_mem (gi, gimple_assign_rhs3 (gas),
> +  read_force,
> +  stmt);
> +      if (t)
> + {
> +  gimple_assign_set_rhs3 (gas, t);
> +  changed = true;
> + }
> +      }
> +  if (gimple_num_ops (gas) > 4)
> +    gcc_unreachable ();
> +  if (gimple_store_p (stmt))
> +    changed |= instrument_store (gi, stmt, flag_vartrace_writes || force);
> +  if (changed)
> +    update_stmt (stmt);
> +  return changed;
> +}
> +
> +/* Instrument return in function FUN at statement STMT at GI, force if
> +   FORCE.  CHANGED is the changed flag, which is also returned.  */
> +
> +static bool instrument_return (function *fun,
> +       gimple_stmt_iterator *gi,
> +       gimple *stmt, bool changed,
> +       bool force)
> +{
> +  tree restype = TREE_TYPE (TREE_TYPE (fun->decl));
> +  greturn *gret = as_a <greturn *> (stmt);
> +  tree rval = gimple_return_retval (gret);
> +
> +  /* Cannot handle complex C++ return values at this point, even
> +     if they would collapse to a valid trace type.  */
> +  if (rval
> +      && useless_type_conversion_p (restype, TREE_TYPE (rval))
> +      && supported_type_or_force (rval, flag_vartrace_returns || force))
> +    {
> +      if (tree tvar = insert_trace (gi, rval, stmt))
> + {
> +  changed = true;
> +  gimple_return_set_retval (gret, tvar);
> +  log_trace_code (NULL, gret, tvar);
> +  update_stmt (stmt);
> + }
> +    }
> +
> +  return changed;
> +}
> +
> +/* Insert vartrace calls for FUN.  */
> +
> +static unsigned int vartrace_execute (function *fun)
> +{
> +  basic_block bb;
> +  gimple_stmt_iterator gi;
> +  bool force = flag_vartrace;
> +  bool changed;
> +
> +  if (lookup_attribute ("vartrace", TYPE_ATTRIBUTES (TREE_TYPE (fun->decl)))
> +      || lookup_attribute ("vartrace", DECL_ATTRIBUTES (fun->decl)))
> +    force = true;
> +
> +  changed = instrument_args (fun, force);
> +
> +  FOR_ALL_BB_FN (bb, fun)
> +    for (gi = gsi_start_bb (bb); !gsi_end_p (gi); gsi_next (&gi))
> +      {
> + gimple *stmt = gsi_stmt (gi);
> + if (is_gimple_assign (stmt) && !gimple_clobber_p (stmt))
> +  changed = instrument_assign (&gi, stmt, changed, force);
> + else if (gimple_code (stmt) == GIMPLE_RETURN)
> +  {
> +    changed = instrument_return (fun, &gi, stmt, changed, force);
> +    // must end basic block
> +    break;
> +  }
> +
> + // instrument something else like CALL?
> + // We assume everything interesting is in a GIMPLE_ASSIGN
> +      }
> +  return changed ? TODO_update_ssa : 0;
> +}
> +
> +const pass_data pass_data_vartrace =
> +{
> +  GIMPLE_PASS, /* type */
> +  "vartrace", /* name */
> +  OPTGROUP_NONE, /* optinfo_flags */
> +  TV_NONE, /* tv_id */
> +  0, /* properties_required */
> +  0, /* properties_provided */
> +  0, /* properties_destroyed */
> +  0, /* todo_flags_start */
> +  0, /* todo_flags_finish */
> +};
> +
> +class pass_vartrace : public gimple_opt_pass
> +{
> +public:
> +  pass_vartrace (gcc::context *ctxt)
> +    : gimple_opt_pass (pass_data_vartrace, ctxt)
> +  {}
> +
> +  virtual opt_pass * clone ()
> +    {
> +      return new pass_vartrace (m_ctxt);
> +    }
> +
> +  virtual bool gate (function *fun)
> +    {
> +      // check if vartrace is supported in backend
> +      if (!targetm.vartrace_func ||
> +  targetm.vartrace_func (integer_type_node) == NULL)
> + return false;
> +
> +      if (lookup_attribute ("no_vartrace", TYPE_ATTRIBUTES (TREE_TYPE (fun->decl)))
> +  || lookup_attribute ("no_vartrace", DECL_ATTRIBUTES (fun->decl)))
> + return false;
> +
> +      // need to run pass always to check for variable attributes
> +      return true;
> +    }
> +
> +  virtual unsigned int execute (function *f) { return vartrace_execute (f); }
> +};
> +
> +gimple_opt_pass *
> +make_pass_vartrace (gcc::context *ctxt)
> +{
> +  return new pass_vartrace (ctxt);
> +}
>

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Andi Kleen-5
On Sun, Nov 11, 2018 at 11:37:57AM -0700, Martin Sebor wrote:
> One other high-level comment: a more powerful interface to
> variable tracing than annotating declarations in the source
> would be to provide either the names of the symbols to trace
> on the command line or in an external file.  That way tracing
> could be enabled for objects and types declared in read-only
> files (such as system headers), and would let the user more
> easily experiment with annotations.

For variables/functions if you add at the end of the source file

typeof(foo) __attribute__(("vartrace"));

it should enable it in theory (haven't tested) for both
variables or functions. Not sure about types, probably not,
but that might not be needed.

But it has to be at the end of the file, so -include doesn't work.
If an -include-after would be added to the preprocessor
it would work.

> This could be in addition to the attributes, and would require
> coming up with a way of identifying symbols with internal or
> no linkage, such as local variables, and perhaps also function

Individual local variables are hard, but you could likely
enable tracing for everything in the function with the
attribute trick above.

-Andi
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Andi Kleen-5
In reply to this post by Richard Biener-2
On Sun, Nov 11, 2018 at 10:06:21AM +0100, Richard Biener wrote:
> That is, usually debuggers look for a location list of a variable
> and find, say, %rax.  But for ptwrite the debugger needs to
> examine all active location lists for, say, %rax and figure out
> that it contains the value for variable 'a'?

In dwarf output you end up with a list of

start-IP...stop-IP <dwarf scope>...  variable locations

Both the original load/store and PTWRITE are in the same scope,
and the debugger just looks it up based on the IP,
so it all works without any extra modifications.

I even had an earlier version of this that instrumented
assembler output of the compiler with PTWRITE in a separate script,
and it worked fine too.
>
> When there isn't any such relation between the ptwrite stored
> value and any variable the ptwrite is useless, right?

A programmer might still be able to make use of it
based on the context or the order.

e.g. if you don't instrument everything, but only specific
variables, or you only instrument arguments and returns or
similar then it could be still useful just based on the IP->symbol
resolution. If you instrument too many things yes it will be
hard to use without debug info resolution.

> I hope you don't mind if this eventually slips to GCC 10 given
> as you say there is no HW available right now.  (still waiting
> for a CPU with CET ...)

:-/

Actually there is.  Gemini Lake Atom Hardware with Goldmont Plus
is shipping for some time and you can buy them.

-Andi
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Richard Biener-2
On Mon, Nov 12, 2018 at 4:16 AM Andi Kleen <[hidden email]> wrote:

>
> On Sun, Nov 11, 2018 at 10:06:21AM +0100, Richard Biener wrote:
> > That is, usually debuggers look for a location list of a variable
> > and find, say, %rax.  But for ptwrite the debugger needs to
> > examine all active location lists for, say, %rax and figure out
> > that it contains the value for variable 'a'?
>
> In dwarf output you end up with a list of
>
> start-IP...stop-IP <dwarf scope>...  variable locations
>
> Both the original load/store and PTWRITE are in the same scope,
> and the debugger just looks it up based on the IP,
> so it all works without any extra modifications.

Yes, that's how I thought it would work.

> I even had an earlier version of this that instrumented
> assembler output of the compiler with PTWRITE in a separate script,
> and it worked fine too.

Apart from eventually messing up branch range restrictions I guess ;)

> >
> > When there isn't any such relation between the ptwrite stored
> > value and any variable the ptwrite is useless, right?
>
> A programmer might still be able to make use of it
> based on the context or the order.

OK.

> e.g. if you don't instrument everything, but only specific
> variables, or you only instrument arguments and returns or
> similar then it could be still useful just based on the IP->symbol
> resolution. If you instrument too many things yes it will be
> hard to use without debug info resolution.

Did you gather any statistics on how many ptwrite instructions
that are generated by your patch are not covered by any
location range & expr?  I assume ptwrite is writing from register
input only so you probably should avoid instrumenting writes
of constants (will require an extra register)?

How does the .text size behave say for cc1 when you enable
the various granularities of instrumentation?  How many
ptwrite instructions are there per 100 regular instructions?

> > I hope you don't mind if this eventually slips to GCC 10 given
> > as you say there is no HW available right now.  (still waiting
> > for a CPU with CET ...)
>
> :-/
>
> Actually there is.  Gemini Lake Atom Hardware with Goldmont Plus
> is shipping for some time and you can buy them.

Ah, interesting.

Can we get an updated patch based on my review?

I still think we should eventually move the pass later and somehow
avoid instrumenting places we'll not have any meaningful locations
in the debug info - if only to reduce required trace bandwith.

Thanks,
Richard.

> -Andi
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Andi Kleen-3
On Tue, Nov 13, 2018 at 09:03:52AM +0100, Richard Biener wrote:
> > I even had an earlier version of this that instrumented
> > assembler output of the compiler with PTWRITE in a separate script,
> > and it worked fine too.
>
> Apart from eventually messing up branch range restrictions I guess ;)

You mean for LOOP? For everything else the assembler handles it I
believe.

> Did you gather any statistics on how many ptwrite instructions
> that are generated by your patch are not covered by any
> location range & expr?  

Need to look into that. Any suggestions how to do it in the compiler?

I had some decode failures with the perf dwarf decoder,
but I was usually blaming them on perf dwarf limitations.

> I assume ptwrite is writing from register
> input only so you probably should avoid instrumenting writes
> of constants (will require an extra register)?

Hmm, I think those are needed unfortunately because someone
might want to trace every update of of something. With branch
tracing it could be recreated theoretically but would
be a lot more work for the decoder.

> How does the .text size behave say for cc1 when you enable
> the various granularities of instrumentation?  How many
> ptwrite instructions are there per 100 regular instructions?

With locals tracing (worst case) I see ~23% of all instructions
in cc1 be PTWRITE. Binary is ~27% bigger.

> Can we get an updated patch based on my review?

Yes, working on it, also addressing Martin's comments. Hopefully soon.
>
> I still think we should eventually move the pass later

It's after pass_sanopt now.

> avoid instrumenting places we'll not have any meaningful locations
> in the debug info - if only to reduce required trace bandwith.

Can you suggest how to check that?

-Andi
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Richard Biener-2
On November 13, 2018 7:09:15 PM GMT+01:00, Andi Kleen <[hidden email]> wrote:

>On Tue, Nov 13, 2018 at 09:03:52AM +0100, Richard Biener wrote:
>> > I even had an earlier version of this that instrumented
>> > assembler output of the compiler with PTWRITE in a separate script,
>> > and it worked fine too.
>>
>> Apart from eventually messing up branch range restrictions I guess ;)
>
>You mean for LOOP? For everything else the assembler handles it I
>believe.
>
>> Did you gather any statistics on how many ptwrite instructions
>> that are generated by your patch are not covered by any
>> location range & expr?  
>
>Need to look into that. Any suggestions how to do it in the compiler?

I guess you need to do that in a dwarf decoder somehow.

>I had some decode failures with the perf dwarf decoder,
>but I was usually blaming them on perf dwarf limitations.
>
>> I assume ptwrite is writing from register
>> input only so you probably should avoid instrumenting writes
>> of constants (will require an extra register)?
>
>Hmm, I think those are needed unfortunately because someone
>might want to trace every update of of something. With branch
>tracing it could be recreated theoretically but would
>be a lot more work for the decoder.
>
>> How does the .text size behave say for cc1 when you enable
>> the various granularities of instrumentation?  How many
>> ptwrite instructions are there per 100 regular instructions?
>
>With locals tracing (worst case) I see ~23% of all instructions
>in cc1 be PTWRITE. Binary is ~27% bigger.

OK, I suppose it will get better when addressing some of my review comments.

>> Can we get an updated patch based on my review?
>
>Yes, working on it, also addressing Martin's comments. Hopefully soon.
>>
>> I still think we should eventually move the pass later
>
>It's after pass_sanopt now.
>
>> avoid instrumenting places we'll not have any meaningful locations
>> in the debug info - if only to reduce required trace bandwith.
>
>Can you suggest how to check that?

I'd look at doing the instrumentation after var-tracking has run - that is what computes the locations in the end. That means instrumenting on late RTL after register allocation (and eventually with branch range restrictions in place). Basically you'd instrument at the same time as generating debug info.

Richard.

>-Andi

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

Andi Kleen-3
On Tue, Nov 13, 2018 at 07:37:27PM +0100, Richard Biener wrote:
> I'd look at doing the instrumentation after var-tracking has run - that is what computes the locations in the end. That means instrumenting on late RTL after register allocation (and eventually with branch range restrictions in place). Basically you'd instrument at the same time as generating debug info.

Ok that would be a full rewrite. I'll check if it's really a problem
first. I would prefer to stay on the GIMPLE level.

-Andi