From e6745a4da964f5f49b21cbcce5125456984ddc6b Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 19 Dec 2022 13:31:07 -0500
Subject: tracing: Add a way to filter function addresses to function names

There's been several times where an event records a function address in
its field and I needed to filter on that address for a specific function
name. It required looking up the function in kallsyms, finding its size,
and doing a compare of "field >= function_start && field < function_end".

But this would change from boot to boot and is unreliable in scripts.
Also, it is useful to have this at boot up, where the addresses will not
be known. For example, on the boot command line:

  trace_trigger="initcall_finish.traceoff if func.function == acpi_init"

To implement this, add a ".function" prefix, that will check that the
field is of size long, and the only operations allowed (so far) are "=="
and "!=".

Link: https://lkml.kernel.org/r/20221219183213.916833763@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Reviewed-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/events.rst     | 12 +++++
 kernel/trace/trace_events.c        |  2 +-
 kernel/trace/trace_events_filter.c | 93 +++++++++++++++++++++++++++++++++++++-
 3 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/Documentation/trace/events.rst b/Documentation/trace/events.rst
index c47f381d0c00..d0fd5c7220b7 100644
--- a/Documentation/trace/events.rst
+++ b/Documentation/trace/events.rst
@@ -207,6 +207,18 @@ field name::
 As the kernel will have to know how to retrieve the memory that the pointer
 is at from user space.
 
+You can convert any long type to a function address and search by function name::
+
+  call_site.function == security_prepare_creds
+
+The above will filter when the field "call_site" falls on the address within
+"security_prepare_creds". That is, it will compare the value of "call_site" and
+the filter will return true if it is greater than or equal to the start of
+the function "security_prepare_creds" and less than the end of that function.
+
+The ".function" postfix can only be attached to values of size long, and can only
+be compared with "==" or "!=".
+
 5.2 Setting filters
 -------------------
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 33e0b4f8ebe6..b07931ad97de 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2822,7 +2822,7 @@ static __init int setup_trace_triggers(char *str)
 		if (!trigger)
 			break;
 		bootup_triggers[i].event = strsep(&trigger, ".");
-		bootup_triggers[i].trigger = strsep(&trigger, ".");
+		bootup_triggers[i].trigger = trigger;
 		if (!bootup_triggers[i].trigger)
 			break;
 	}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index e095c3b3a50d..1dad64267878 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -64,6 +64,7 @@ enum filter_pred_fn {
 	FILTER_PRED_FN_PCHAR_USER,
 	FILTER_PRED_FN_PCHAR,
 	FILTER_PRED_FN_CPU,
+	FILTER_PRED_FN_FUNCTION,
 	FILTER_PRED_FN_,
 	FILTER_PRED_TEST_VISITED,
 };
@@ -71,6 +72,7 @@ enum filter_pred_fn {
 struct filter_pred {
 	enum filter_pred_fn 	fn_num;
 	u64 			val;
+	u64 			val2;
 	struct regex		regex;
 	unsigned short		*ops;
 	struct ftrace_event_field *field;
@@ -103,6 +105,7 @@ struct filter_pred {
 	C(INVALID_FILTER,	"Meaningless filter expression"),	\
 	C(IP_FIELD_ONLY,	"Only 'ip' field is supported for function trace"), \
 	C(INVALID_VALUE,	"Invalid value (did you forget quotes)?"), \
+	C(NO_FUNCTION,		"Function not found"),			\
 	C(ERRNO,		"Error"),				\
 	C(NO_FILTER,		"No filter found")
 
@@ -876,6 +879,17 @@ static int filter_pred_comm(struct filter_pred *pred, void *event)
 	return cmp ^ pred->not;
 }
 
+/* Filter predicate for functions. */
+static int filter_pred_function(struct filter_pred *pred, void *event)
+{
+	unsigned long *addr = (unsigned long *)(event + pred->offset);
+	unsigned long start = (unsigned long)pred->val;
+	unsigned long end = (unsigned long)pred->val2;
+	int ret = *addr >= start && *addr < end;
+
+	return pred->op == OP_EQ ? ret : !ret;
+}
+
 /*
  * regex_match_foo - Basic regex callbacks
  *
@@ -1335,6 +1349,8 @@ static int filter_pred_fn_call(struct filter_pred *pred, void *event)
 		return filter_pred_pchar(pred, event);
 	case FILTER_PRED_FN_CPU:
 		return filter_pred_cpu(pred, event);
+	case FILTER_PRED_FN_FUNCTION:
+		return filter_pred_function(pred, event);
 	case FILTER_PRED_TEST_VISITED:
 		return test_pred_visited_fn(pred, event);
 	default:
@@ -1350,8 +1366,13 @@ static int parse_pred(const char *str, void *data,
 	struct trace_event_call *call = data;
 	struct ftrace_event_field *field;
 	struct filter_pred *pred = NULL;
+	unsigned long offset;
+	unsigned long size;
+	unsigned long ip;
 	char num_buf[24];	/* Big enough to hold an address */
 	char *field_name;
+	char *name;
+	bool function = false;
 	bool ustring = false;
 	char q;
 	u64 val;
@@ -1393,6 +1414,12 @@ static int parse_pred(const char *str, void *data,
 		i += len;
 	}
 
+	/* See if the field is a kernel function name */
+	if ((len = str_has_prefix(str + i, ".function"))) {
+		function = true;
+		i += len;
+	}
+
 	while (isspace(str[i]))
 		i++;
 
@@ -1423,7 +1450,71 @@ static int parse_pred(const char *str, void *data,
 	pred->offset = field->offset;
 	pred->op = op;
 
-	if (ftrace_event_is_function(call)) {
+	if (function) {
+		/* The field must be the same size as long */
+		if (field->size != sizeof(long)) {
+			parse_error(pe, FILT_ERR_ILLEGAL_FIELD_OP, pos + i);
+			goto err_free;
+		}
+
+		/* Function only works with '==' or '!=' and an unquoted string */
+		switch (op) {
+		case OP_NE:
+		case OP_EQ:
+			break;
+		default:
+			parse_error(pe, FILT_ERR_INVALID_OP, pos + i);
+			goto err_free;
+		}
+
+		if (isdigit(str[i])) {
+			/* We allow 0xDEADBEEF */
+			while (isalnum(str[i]))
+				i++;
+
+			len = i - s;
+			/* 0xfeedfacedeadbeef is 18 chars max */
+			if (len >= sizeof(num_buf)) {
+				parse_error(pe, FILT_ERR_OPERAND_TOO_LONG, pos + i);
+				goto err_free;
+			}
+
+			strncpy(num_buf, str + s, len);
+			num_buf[len] = 0;
+
+			ret = kstrtoul(num_buf, 0, &ip);
+			if (ret) {
+				parse_error(pe, FILT_ERR_INVALID_VALUE, pos + i);
+				goto err_free;
+			}
+		} else {
+			s = i;
+			for (; str[i] && !isspace(str[i]); i++)
+				;
+
+			len = i - s;
+			name = kmemdup_nul(str + s, len, GFP_KERNEL);
+			if (!name)
+				goto err_mem;
+			ip = kallsyms_lookup_name(name);
+			kfree(name);
+			if (!ip) {
+				parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
+				goto err_free;
+			}
+		}
+
+		/* Now find the function start and end address */
+		if (!kallsyms_lookup_size_offset(ip, &size, &offset)) {
+			parse_error(pe, FILT_ERR_NO_FUNCTION, pos + i);
+			goto err_free;
+		}
+
+		pred->fn_num = FILTER_PRED_FN_FUNCTION;
+		pred->val = ip - offset;
+		pred->val2 = pred->val + size;
+
+	} else if (ftrace_event_is_function(call)) {
 		/*
 		 * Perf does things different with function events.
 		 * It only allows an "ip" field, and expects a string.
-- 
cgit v1.2.1


From 7f09d639b8c4959736ff112d0fb42780c37ff6db Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 19 Dec 2022 13:31:08 -0500
Subject: tracing/selftests: Add test for event filtering on function name

With the new filter logic of passing in the name of a function to match an
instruction pointer (or the address of the function), add a test to make
sure that it is functional.

This is also the first test to test plain filtering. The filtering has
been tested via the trigger logic, which uses the same code, but there was
nothing to test just the event filter, so this test is the first to add
such a case.

Link: https://lkml.kernel.org/r/20221219183214.075559302@goodmis.org

Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Zheng Yejian <zhengyejian1@huawei.com>
Cc: linux-kselftest@vger.kernel.org
Suggested-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Ross Zwisler <zwisler@google.com>
Acked-by: Shuah Khan <skhan@linuxfoundation.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 .../ftrace/test.d/filter/event-filter-function.tc  | 58 ++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc

diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
new file mode 100644
index 000000000000..e2ff3bf4df80
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -0,0 +1,58 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event filter function - test event filtering on functions
+# requires: set_event events/kmem/kmem_cache_free/filter
+# flags: instance
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+echo "Test event filter function name"
+echo 0 > tracing_on
+echo 0 > events/enable
+echo > trace
+echo 'call_site.function == exit_mmap' > events/kmem/kmem_cache_free/filter
+echo 1 > events/kmem/kmem_cache_free/enable
+echo 1 > tracing_on
+ls > /dev/null
+echo 0 > events/kmem/kmem_cache_free/enable
+
+hitcnt=`grep kmem_cache_free trace| grep exit_mmap | wc -l`
+misscnt=`grep kmem_cache_free trace| grep -v exit_mmap | wc -l`
+
+if [ $hitcnt -eq 0 ]; then
+	exit_fail
+fi
+
+if [ $misscnt -gt 0 ]; then
+	exit_fail
+fi
+
+address=`grep ' exit_mmap$' /proc/kallsyms | cut -d' ' -f1`
+
+echo "Test event filter function address"
+echo 0 > tracing_on
+echo 0 > events/enable
+echo > trace
+echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter
+echo 1 > events/kmem/kmem_cache_free/enable
+echo 1 > tracing_on
+sleep 1
+echo 0 > events/kmem/kmem_cache_free/enable
+
+hitcnt=`grep kmem_cache_free trace| grep exit_mmap | wc -l`
+misscnt=`grep kmem_cache_free trace| grep -v exit_mmap | wc -l`
+
+if [ $hitcnt -eq 0 ]; then
+	exit_fail
+fi
+
+if [ $misscnt -gt 0 ]; then
+	exit_fail
+fi
+
+reset_events_filter
+
+exit 0
-- 
cgit v1.2.1


From b56c68f705cad0cff61fbe132c66ced2c737c65c Mon Sep 17 00:00:00 2001
From: Mark Rutland <mark.rutland@arm.com>
Date: Tue, 3 Jan 2023 12:49:12 +0000
Subject: ftrace: Add sample with custom ops

When reworking core ftrace code or architectural ftrace code, it's often
necessary to test/analyse/benchmark a number of ftrace_ops
configurations. This patch adds a module which can be used to explore
some of those configurations.

I'm using this to benchmark various options for changing the way
trampolines and handling of ftrace_ops work on arm64, and ensuring other
architectures aren't adversely affected.

For example, in a QEMU+KVM VM running on a 2GHz Xeon E5-2660
workstation, loading the module in various configurations produces:

| # insmod ftrace-ops.ko
| ftrace_ops: registering:
|   relevant ops: 1
|     tracee: tracee_relevant [ftrace_ops]
|     tracer: ops_func_nop [ftrace_ops]
|   irrelevant ops: 0
|     tracee: tracee_irrelevant [ftrace_ops]
|     tracer: ops_func_nop [ftrace_ops]
|   saving registers: NO
|   assist recursion: NO
|   assist RCU: NO
| ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 1681558ns (16ns / call)

| # insmod ftrace-ops.ko nr_ops_irrelevant=5
| ftrace_ops: registering:
|   relevant ops: 1
|     tracee: tracee_relevant [ftrace_ops]
|     tracer: ops_func_nop [ftrace_ops]
|   irrelevant ops: 5
|     tracee: tracee_irrelevant [ftrace_ops]
|     tracer: ops_func_nop [ftrace_ops]
|   saving registers: NO
|   assist recursion: NO
|   assist RCU: NO
| ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 1693042ns (16ns / call)

| # insmod ftrace-ops.ko nr_ops_relevant=2
| ftrace_ops: registering:
|   relevant ops: 2
|     tracee: tracee_relevant [ftrace_ops]
|     tracer: ops_func_nop [ftrace_ops]
|   irrelevant ops: 0
|     tracee: tracee_irrelevant [ftrace_ops]
|     tracer: ops_func_nop [ftrace_ops]
|   saving registers: NO
|   assist recursion: NO
|   assist RCU: NO
| ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 11965582ns (119ns / call)

| # insmod ftrace-ops.ko save_regs=true
| ftrace_ops: registering:
|   relevant ops: 1
|     tracee: tracee_relevant [ftrace_ops]
|     tracer: ops_func_nop [ftrace_ops]
|   irrelevant ops: 0
|     tracee: tracee_irrelevant [ftrace_ops]
|     tracer: ops_func_nop [ftrace_ops]
|   saving registers: YES
|   assist recursion: NO
|   assist RCU: NO
| ftrace_ops: Attempted 100000 calls to tracee_relevant [ftrace_ops] in 4459624ns (44ns / call)

Link: https://lkml.kernel.org/r/20230103124912.2948963-4-mark.rutland@arm.com

Cc: Florent Revest <revest@chromium.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 samples/Kconfig             |   7 ++
 samples/Makefile            |   1 +
 samples/ftrace/Makefile     |   1 +
 samples/ftrace/ftrace-ops.c | 252 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 261 insertions(+)
 create mode 100644 samples/ftrace/ftrace-ops.c

diff --git a/samples/Kconfig b/samples/Kconfig
index 0d81c00289ee..daf14c35f071 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -46,6 +46,13 @@ config SAMPLE_FTRACE_DIRECT_MULTI
 	  that hooks to wake_up_process and schedule, and prints
 	  the function addresses.
 
+config SAMPLE_FTRACE_OPS
+	tristate "Build custom ftrace ops example"
+	depends on FUNCTION_TRACER
+	help
+	  This builds an ftrace ops example that hooks two functions and
+	  measures the time taken to invoke one function a number of times.
+
 config SAMPLE_TRACE_ARRAY
         tristate "Build sample module for kernel access to Ftrace instancess"
 	depends on EVENT_TRACING && m
diff --git a/samples/Makefile b/samples/Makefile
index 9832ef3f8fcb..7cb632ef88ee 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_SAMPLE_TRACE_CUSTOM_EVENTS) += trace_events/
 obj-$(CONFIG_SAMPLE_TRACE_PRINTK)	+= trace_printk/
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT)	+= ftrace/
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace/
+obj-$(CONFIG_SAMPLE_FTRACE_OPS)		+= ftrace/
 obj-$(CONFIG_SAMPLE_TRACE_ARRAY)	+= ftrace/
 subdir-$(CONFIG_SAMPLE_UHID)		+= uhid
 obj-$(CONFIG_VIDEO_PCI_SKELETON)	+= v4l/
diff --git a/samples/ftrace/Makefile b/samples/ftrace/Makefile
index faf8cdb79c5f..589baf2ec4e3 100644
--- a/samples/ftrace/Makefile
+++ b/samples/ftrace/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-too.o
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT) += ftrace-direct-modify.o
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi.o
 obj-$(CONFIG_SAMPLE_FTRACE_DIRECT_MULTI) += ftrace-direct-multi-modify.o
+obj-$(CONFIG_SAMPLE_FTRACE_OPS) += ftrace-ops.o
 
 CFLAGS_sample-trace-array.o := -I$(src)
 obj-$(CONFIG_SAMPLE_TRACE_ARRAY) += sample-trace-array.o
diff --git a/samples/ftrace/ftrace-ops.c b/samples/ftrace/ftrace-ops.c
new file mode 100644
index 000000000000..24deb51c7261
--- /dev/null
+++ b/samples/ftrace/ftrace-ops.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt)       KBUILD_MODNAME ": " fmt
+
+#include <linux/ftrace.h>
+#include <linux/ktime.h>
+#include <linux/module.h>
+
+#include <asm/barrier.h>
+
+/*
+ * Arbitrary large value chosen to be sufficiently large to minimize noise but
+ * sufficiently small to complete quickly.
+ */
+unsigned int nr_function_calls = 100000;
+module_param(nr_function_calls, uint, 0);
+MODULE_PARM_DESC(nr_function_calls, "How many times to call the relevant tracee");
+
+/*
+ * The number of ops associated with a call site affects whether a tracer can
+ * be called directly or whether it's necessary to go via the list func, which
+ * can be significantly more expensive.
+ */
+unsigned int nr_ops_relevant = 1;
+module_param(nr_ops_relevant, uint, 0);
+MODULE_PARM_DESC(nr_ops_relevant, "How many ftrace_ops to associate with the relevant tracee");
+
+/*
+ * On architectures where all call sites share the same trampoline, having
+ * tracers enabled for distinct functions can force the use of the list func
+ * and incur overhead for all call sites.
+ */
+unsigned int nr_ops_irrelevant = 0;
+module_param(nr_ops_irrelevant, uint, 0);
+MODULE_PARM_DESC(nr_ops_irrelevant, "How many ftrace_ops to associate with the irrelevant tracee");
+
+/*
+ * On architectures with DYNAMIC_FTRACE_WITH_REGS, saving the full pt_regs can
+ * be more expensive than only saving the minimal necessary regs.
+ */
+bool save_regs = false;
+module_param(save_regs, bool, 0);
+MODULE_PARM_DESC(save_regs, "Register ops with FTRACE_OPS_FL_SAVE_REGS (save all registers in the trampoline)");
+
+bool assist_recursion = false;
+module_param(assist_recursion, bool, 0);
+MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RECURSION");
+
+bool assist_rcu = false;
+module_param(assist_rcu, bool, 0);
+MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RCU");
+
+/*
+ * By default, a trivial tracer is used which immediately returns to mimimize
+ * overhead. Sometimes a consistency check using a more expensive tracer is
+ * desireable.
+ */
+bool check_count = false;
+module_param(check_count, bool, 0);
+MODULE_PARM_DESC(check_count, "Check that tracers are called the expected number of times\n");
+
+/*
+ * Usually it's not interesting to leave the ops registered after the test
+ * runs, but sometimes it can be useful to leave them registered so that they
+ * can be inspected through the tracefs 'enabled_functions' file.
+ */
+bool persist = false;
+module_param(persist, bool, 0);
+MODULE_PARM_DESC(persist, "Successfully load module and leave ftrace ops registered after test completes\n");
+
+/*
+ * Marked as noinline to ensure that an out-of-line traceable copy is
+ * generated by the compiler.
+ *
+ * The barrier() ensures the compiler won't elide calls by determining there
+ * are no side-effects.
+ */
+static noinline void tracee_relevant(void)
+{
+	barrier();
+}
+
+/*
+ * Marked as noinline to ensure that an out-of-line traceable copy is
+ * generated by the compiler.
+ *
+ * The barrier() ensures the compiler won't elide calls by determining there
+ * are no side-effects.
+ */
+static noinline void tracee_irrelevant(void)
+{
+	barrier();
+}
+
+struct sample_ops {
+	struct ftrace_ops ops;
+	unsigned int count;
+};
+
+static void ops_func_nop(unsigned long ip, unsigned long parent_ip,
+			 struct ftrace_ops *op,
+			 struct ftrace_regs *fregs)
+{
+	/* do nothing */
+}
+
+static void ops_func_count(unsigned long ip, unsigned long parent_ip,
+			   struct ftrace_ops *op,
+			   struct ftrace_regs *fregs)
+{
+	struct sample_ops *self;
+
+	self = container_of(op, struct sample_ops, ops);
+	self->count++;
+}
+
+struct sample_ops *ops_relevant;
+struct sample_ops *ops_irrelevant;
+
+static struct sample_ops *ops_alloc_init(void *tracee, ftrace_func_t func,
+					 unsigned long flags, int nr)
+{
+	struct sample_ops *ops;
+
+	ops = kcalloc(nr, sizeof(*ops), GFP_KERNEL);
+	if (WARN_ON_ONCE(!ops))
+		return NULL;
+
+	for (unsigned int i = 0; i < nr; i++) {
+		ops[i].ops.func = func;
+		ops[i].ops.flags = flags;
+		WARN_ON_ONCE(ftrace_set_filter_ip(&ops[i].ops, (unsigned long)tracee, 0, 0));
+		WARN_ON_ONCE(register_ftrace_function(&ops[i].ops));
+	}
+
+	return ops;
+}
+
+static void ops_destroy(struct sample_ops *ops, int nr)
+{
+	if (!ops)
+		return;
+
+	for (unsigned int i = 0; i < nr; i++) {
+		WARN_ON_ONCE(unregister_ftrace_function(&ops[i].ops));
+		ftrace_free_filter(&ops[i].ops);
+	}
+
+	kfree(ops);
+}
+
+static void ops_check(struct sample_ops *ops, int nr,
+		      unsigned int expected_count)
+{
+	if (!ops || !check_count)
+		return;
+
+	for (unsigned int i = 0; i < nr; i++) {
+		if (ops->count == expected_count)
+			continue;
+		pr_warn("Counter called %u times (expected %u)\n",
+			ops->count, expected_count);
+	}
+}
+
+ftrace_func_t tracer_relevant = ops_func_nop;
+ftrace_func_t tracer_irrelevant = ops_func_nop;
+
+static int __init ftrace_ops_sample_init(void)
+{
+	unsigned long flags = 0;
+	ktime_t start, end;
+	u64 period;
+
+	if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && save_regs) {
+		pr_info("this kernel does not support saving registers\n");
+		save_regs = false;
+	} else if (save_regs) {
+		flags |= FTRACE_OPS_FL_SAVE_REGS;
+	}
+
+	if (assist_recursion)
+		flags |= FTRACE_OPS_FL_RECURSION;
+
+	if (assist_rcu)
+		flags |= FTRACE_OPS_FL_RCU;
+
+	if (check_count) {
+		tracer_relevant = ops_func_count;
+		tracer_irrelevant = ops_func_count;
+	}
+
+	pr_info("registering:\n"
+		"  relevant ops: %u\n"
+		"    tracee: %ps\n"
+		"    tracer: %ps\n"
+		"  irrelevant ops: %u\n"
+		"    tracee: %ps\n"
+		"    tracer: %ps\n"
+		"  saving registers: %s\n"
+		"  assist recursion: %s\n"
+		"  assist RCU: %s\n",
+		nr_ops_relevant, tracee_relevant, tracer_relevant,
+		nr_ops_irrelevant, tracee_irrelevant, tracer_irrelevant,
+		save_regs ? "YES" : "NO",
+		assist_recursion ? "YES" : "NO",
+		assist_rcu ? "YES" : "NO");
+
+	ops_relevant = ops_alloc_init(tracee_relevant, tracer_relevant,
+				      flags, nr_ops_relevant);
+	ops_irrelevant = ops_alloc_init(tracee_irrelevant, tracer_irrelevant,
+					flags, nr_ops_irrelevant);
+
+	start = ktime_get();
+	for (unsigned int i = 0; i < nr_function_calls; i++)
+		tracee_relevant();
+	end = ktime_get();
+
+	ops_check(ops_relevant, nr_ops_relevant, nr_function_calls);
+	ops_check(ops_irrelevant, nr_ops_irrelevant, 0);
+
+	period = ktime_to_ns(ktime_sub(end, start));
+
+	pr_info("Attempted %u calls to %ps in %lluns (%lluns / call)\n",
+		nr_function_calls, tracee_relevant,
+		period, period / nr_function_calls);
+
+	if (persist)
+		return 0;
+
+	ops_destroy(ops_relevant, nr_ops_relevant);
+	ops_destroy(ops_irrelevant, nr_ops_irrelevant);
+
+	/*
+	 * The benchmark completed sucessfully, but there's no reason to keep
+	 * the module around. Return an error do the user doesn't have to
+	 * manually unload the module.
+	 */
+	return -EINVAL;
+}
+module_init(ftrace_ops_sample_init);
+
+static void __exit ftrace_ops_sample_exit(void)
+{
+	ops_destroy(ops_relevant, nr_ops_relevant);
+	ops_destroy(ops_irrelevant, nr_ops_irrelevant);
+}
+module_exit(ftrace_ops_sample_exit);
+
+MODULE_AUTHOR("Mark Rutland");
+MODULE_DESCRIPTION("Example of using custom ftrace_ops");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.1


From 3e4272b9954094907f16861199728f14002fcaf6 Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Fri, 13 Jan 2023 20:55:01 +0800
Subject: tracing: Add NULL checks for buffer in ring_buffer_free_read_page()

In a previous commit 7433632c9ff6, buffer, buffer->buffers and
buffer->buffers[cpu] in ring_buffer_wake_waiters() can be NULL,
and thus the related checks are added.

However, in the same call stack, these variables are also used in
ring_buffer_free_read_page():

tracing_buffers_release()
  ring_buffer_wake_waiters(iter->array_buffer->buffer)
    cpu_buffer = buffer->buffers[cpu] -> Add checks by previous commit
  ring_buffer_free_read_page(iter->array_buffer->buffer)
    cpu_buffer = buffer->buffers[cpu] -> No check

Thus, to avod possible null-pointer derefernces, the related checks
should be added.

These results are reported by a static tool designed by myself.

Link: https://lkml.kernel.org/r/20230113125501.760324-1-baijiaju1990@gmail.com

Reported-by: TOTE Robot <oslab@tsinghua.edu.cn>
Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index c366a0a9ddba..45d4a23d6044 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5626,11 +5626,16 @@ EXPORT_SYMBOL_GPL(ring_buffer_alloc_read_page);
  */
 void ring_buffer_free_read_page(struct trace_buffer *buffer, int cpu, void *data)
 {
-	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
+	struct ring_buffer_per_cpu *cpu_buffer;
 	struct buffer_data_page *bpage = data;
 	struct page *page = virt_to_page(bpage);
 	unsigned long flags;
 
+	if (!buffer || !buffer->buffers || !buffer->buffers[cpu])
+		return;
+
+	cpu_buffer = buffer->buffers[cpu];
+
 	/* If the page is still in use someplace else, we can't reuse it */
 	if (page_ref_count(page) > 1)
 		goto out;
-- 
cgit v1.2.1


From 19ff80496450f58a148effa2551350f3b89e4990 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 17 Jan 2023 10:21:26 -0500
Subject: tracing: Simplify calculating entry size using struct_size()

When tracing a dynamic string field for a synthetic event, the offset
calculation for where to write the next event can use struct_size() to
find what the current size of the structure is.

This simplifies the code and makes it less error prone.

Link: https://lkml.kernel.org/r/20230117152235.698632147@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Ching-lin Yu <chinglinyu@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 67592eed0be8..9f79cd689b79 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -416,8 +416,7 @@ static unsigned int trace_string(struct synth_trace_event *entry,
 	if (is_dynamic) {
 		u32 data_offset;
 
-		data_offset = offsetof(typeof(*entry), fields);
-		data_offset += event->n_u64 * sizeof(u64);
+		data_offset = struct_size(entry, fields, event->n_u64);
 		data_offset += data_size;
 
 		len = kern_fetch_store_strlen((unsigned long)str_val);
-- 
cgit v1.2.1


From 288709c9f3b0236000754824bcabc9a9ffaa3738 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 17 Jan 2023 10:21:27 -0500
Subject: tracing: Allow stacktraces to be saved as histogram variables

Allow to save stacktraces into a histogram variable. This will be used by
synthetic events to allow a stacktrace from one event to be passed and
displayed by another event.

The special keyword "stacktrace" is to be used to trigger a stack
trace for the event that the histogram trigger is attached to.

  echo 'hist:keys=pid:st=stacktrace" > events/sched/sched_waking/trigger

Currently nothing can get access to the "$st" variable above that contains
the stack trace, but that will soon change.

Link: https://lkml.kernel.org/r/20230117152235.856323729@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Ching-lin Yu <chinglinyu@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 52 ++++++++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 5edbf6b1da3f..f8f67e17898a 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1360,6 +1360,8 @@ static const char *hist_field_name(struct hist_field *field,
 			field_name = field->name;
 	} else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
 		field_name = "common_timestamp";
+	else if (field->flags & HIST_FIELD_FL_STACKTRACE)
+		field_name = "stacktrace";
 	else if (field->flags & HIST_FIELD_FL_HITCOUNT)
 		field_name = "hitcount";
 
@@ -1980,6 +1982,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
 
 	if (flags & HIST_FIELD_FL_STACKTRACE) {
 		hist_field->fn_num = HIST_FIELD_FN_NOP;
+		hist_field->size = HIST_STACKTRACE_SIZE;
+		hist_field->type = kstrdup_const("unsigned long[]", GFP_KERNEL);
+		if (!hist_field->type)
+			goto free;
 		goto out;
 	}
 
@@ -2351,6 +2357,8 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 		hist_data->enable_timestamps = true;
 		if (*flags & HIST_FIELD_FL_TIMESTAMP_USECS)
 			hist_data->attrs->ts_in_usecs = true;
+	} else if (strcmp(field_name, "stacktrace") == 0) {
+		*flags |= HIST_FIELD_FL_STACKTRACE;
 	} else if (strcmp(field_name, "common_cpu") == 0)
 		*flags |= HIST_FIELD_FL_CPU;
 	else if (strcmp(field_name, "hitcount") == 0)
@@ -3119,13 +3127,24 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
 		var_val = hist_fn_call(val, elt, buffer, rbe, rec);
 		var_idx = var->var.idx;
 
-		if (val->flags & HIST_FIELD_FL_STRING) {
+		if (val->flags & (HIST_FIELD_FL_STRING |
+				  HIST_FIELD_FL_STACKTRACE)) {
 			char *str = elt_data->field_var_str[j++];
 			char *val_str = (char *)(uintptr_t)var_val;
 			unsigned int size;
 
-			size = min(val->size, STR_VAR_LEN_MAX);
-			strscpy(str, val_str, size);
+			if (val->flags & HIST_FIELD_FL_STRING) {
+				size = min(val->size, STR_VAR_LEN_MAX);
+				strscpy(str, val_str, size);
+			} else {
+				int e;
+
+				e = stack_trace_save((void *)str,
+						     HIST_STACKTRACE_DEPTH,
+						     HIST_STACKTRACE_SKIP);
+				if (e < HIST_STACKTRACE_DEPTH - 1)
+					((unsigned long *)str)[e] = 0;
+			}
 			var_val = (u64)(uintptr_t)str;
 		}
 		tracing_map_set_var(elt, var_idx, var_val);
@@ -3824,7 +3843,8 @@ static void save_field_var(struct hist_trigger_data *hist_data,
 {
 	hist_data->field_vars[hist_data->n_field_vars++] = field_var;
 
-	if (field_var->val->flags & HIST_FIELD_FL_STRING)
+	/* Stack traces are saved in the string storage too */
+	if (field_var->val->flags & (HIST_FIELD_FL_STRING | HIST_FIELD_FL_STACKTRACE))
 		hist_data->n_field_var_str++;
 }
 
@@ -4103,7 +4123,8 @@ static int action_create(struct hist_trigger_data *hist_data,
 			}
 
 			hist_data->save_vars[hist_data->n_save_vars++] = field_var;
-			if (field_var->val->flags & HIST_FIELD_FL_STRING)
+			if (field_var->val->flags &
+			    (HIST_FIELD_FL_STRING | HIST_FIELD_FL_STACKTRACE))
 				hist_data->n_save_var_str++;
 			kfree(param);
 		}
@@ -4351,7 +4372,8 @@ static int create_var_field(struct hist_trigger_data *hist_data,
 	if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_EXECNAME)
 		update_var_execname(hist_data->fields[val_idx]);
 
-	if (!ret && hist_data->fields[val_idx]->flags & HIST_FIELD_FL_STRING)
+	if (!ret && hist_data->fields[val_idx]->flags &
+	    (HIST_FIELD_FL_STRING | HIST_FIELD_FL_STACKTRACE))
 		hist_data->fields[val_idx]->var_str_idx = hist_data->n_var_str++;
 
 	return ret;
@@ -5092,7 +5114,8 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
 		if (hist_field->flags & HIST_FIELD_FL_VAR) {
 			var_idx = hist_field->var.idx;
 
-			if (hist_field->flags & HIST_FIELD_FL_STRING) {
+			if (hist_field->flags &
+			    (HIST_FIELD_FL_STRING | HIST_FIELD_FL_STACKTRACE)) {
 				unsigned int str_start, var_str_idx, idx;
 				char *str, *val_str;
 				unsigned int size;
@@ -5105,9 +5128,18 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
 				str = elt_data->field_var_str[idx];
 				val_str = (char *)(uintptr_t)hist_val;
 
-				size = min(hist_field->size, STR_VAR_LEN_MAX);
-				strscpy(str, val_str, size);
-
+				if (hist_field->flags & HIST_FIELD_FL_STRING) {
+					size = min(hist_field->size, STR_VAR_LEN_MAX);
+					strscpy(str, val_str, size);
+				} else {
+					int e;
+
+					e = stack_trace_save((void *)str,
+							     HIST_STACKTRACE_DEPTH,
+							     HIST_STACKTRACE_SKIP);
+					if (e < HIST_STACKTRACE_DEPTH - 1)
+						((unsigned long *)str)[e] = 0;
+				}
 				hist_val = (u64)(uintptr_t)str;
 			}
 			tracing_map_set_var(elt, var_idx, hist_val);
-- 
cgit v1.2.1


From 00cf3d672a9dd409418647e9f98784c339c3ff63 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 17 Jan 2023 10:21:28 -0500
Subject: tracing: Allow synthetic events to pass around stacktraces

Allow a stacktrace from one event to be displayed by the end event of a
synthetic event. This is very useful when looking for the longest latency
of a sleep or something blocked on I/O.

 # cd /sys/kernel/tracing/
 # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
 # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace  if prev_state == 1||prev_state == 2' > events/sched/sched_switch/trigger
 # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger

The above creates a "block_lat" synthetic event that take the stacktrace of
when a task schedules out in either the interruptible or uninterruptible
states, and on a new per process max $delta (the time it was scheduled
out), will print the process id and the stacktrace.

  # echo 1 > events/synthetic/block_lat/enable
  # cat trace
 #           TASK-PID     CPU#  |||||  TIMESTAMP  FUNCTION
 #              | |         |   |||||     |         |
    kworker/u16:0-767     [006] d..4.   560.645045: block_lat: pid=767 delta=66 stack=STACK:
 => __schedule
 => schedule
 => pipe_read
 => vfs_read
 => ksys_read
 => do_syscall_64
 => 0x966000aa

           <idle>-0       [003] d..4.   561.132117: block_lat: pid=0 delta=413787 stack=STACK:
 => __schedule
 => schedule
 => schedule_hrtimeout_range_clock
 => do_sys_poll
 => __x64_sys_poll
 => do_syscall_64
 => 0x966000aa

            <...>-153     [006] d..4.   562.068407: block_lat: pid=153 delta=54 stack=STACK:
 => __schedule
 => schedule
 => io_schedule
 => rq_qos_wait
 => wbt_wait
 => __rq_qos_throttle
 => blk_mq_submit_bio
 => submit_bio_noacct_nocheck
 => ext4_bio_write_page
 => mpage_submit_page
 => mpage_process_page_bufs
 => mpage_prepare_extent_to_map
 => ext4_do_writepages
 => ext4_writepages
 => do_writepages
 => __writeback_single_inode

Link: https://lkml.kernel.org/r/20230117152236.010941267@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Ching-lin Yu <chinglinyu@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.h              |  4 ++
 kernel/trace/trace_events_hist.c  |  7 ++--
 kernel/trace/trace_events_synth.c | 80 ++++++++++++++++++++++++++++++++++++++-
 kernel/trace/trace_synth.h        |  1 +
 4 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4eb6d6b97a9f..d16929dd0f08 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -113,6 +113,10 @@ enum trace_type {
 #define MEM_FAIL(condition, fmt, ...)					\
 	DO_ONCE_LITE_IF(condition, pr_err, "ERROR: " fmt, ##__VA_ARGS__)
 
+#define HIST_STACKTRACE_DEPTH	16
+#define HIST_STACKTRACE_SIZE	(HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
+#define HIST_STACKTRACE_SKIP	5
+
 /*
  * syscalls are special, and need special handling, this is why
  * they are not included in trace_entries.h
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index f8f67e17898a..2c0837ff2299 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -480,10 +480,6 @@ DEFINE_HIST_FIELD_FN(u8);
 #define for_each_hist_key_field(i, hist_data)	\
 	for ((i) = (hist_data)->n_vals; (i) < (hist_data)->n_fields; (i)++)
 
-#define HIST_STACKTRACE_DEPTH	16
-#define HIST_STACKTRACE_SIZE	(HIST_STACKTRACE_DEPTH * sizeof(unsigned long))
-#define HIST_STACKTRACE_SKIP	5
-
 #define HITCOUNT_IDX		0
 #define HIST_KEY_SIZE_MAX	(MAX_FILTER_STR_VAL + HIST_STACKTRACE_SIZE)
 
@@ -3869,6 +3865,9 @@ static int check_synth_field(struct synth_event *event,
 	    && field->is_dynamic)
 		return 0;
 
+	if (strstr(hist_field->type, "long[") && field->is_stack)
+		return 0;
+
 	if (strcmp(field->type, hist_field->type) != 0) {
 		if (field->size != hist_field->size ||
 		    (!field->is_string && field->is_signed != hist_field->is_signed))
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 9f79cd689b79..adb630633f31 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -173,6 +173,14 @@ static int synth_field_is_string(char *type)
 	return false;
 }
 
+static int synth_field_is_stack(char *type)
+{
+	if (strstr(type, "long[") != NULL)
+		return true;
+
+	return false;
+}
+
 static int synth_field_string_size(char *type)
 {
 	char buf[4], *end, *start;
@@ -248,6 +256,8 @@ static int synth_field_size(char *type)
 		size = sizeof(gfp_t);
 	else if (synth_field_is_string(type))
 		size = synth_field_string_size(type);
+	else if (synth_field_is_stack(type))
+		size = 0;
 
 	return size;
 }
@@ -292,6 +302,8 @@ static const char *synth_field_fmt(char *type)
 		fmt = "%x";
 	else if (synth_field_is_string(type))
 		fmt = "%.*s";
+	else if (synth_field_is_stack(type))
+		fmt = "%s";
 
 	return fmt;
 }
@@ -371,6 +383,23 @@ static enum print_line_t print_synth_event(struct trace_iterator *iter,
 						 i == se->n_fields - 1 ? "" : " ");
 				n_u64 += STR_VAR_LEN_MAX / sizeof(u64);
 			}
+		} else if (se->fields[i]->is_stack) {
+			u32 offset, data_offset, len;
+			unsigned long *p, *end;
+
+			offset = (u32)entry->fields[n_u64];
+			data_offset = offset & 0xffff;
+			len = offset >> 16;
+
+			p = (void *)entry + data_offset;
+			end = (void *)p + len - (sizeof(long) - 1);
+
+			trace_seq_printf(s, "%s=STACK:\n", se->fields[i]->name);
+
+			for (; *p && p < end; p++)
+				trace_seq_printf(s, "=> %pS\n", (void *)*p);
+			n_u64++;
+
 		} else {
 			struct trace_print_flags __flags[] = {
 			    __def_gfpflag_names, {-1, NULL} };
@@ -446,6 +475,43 @@ static unsigned int trace_string(struct synth_trace_event *entry,
 	return len;
 }
 
+static unsigned int trace_stack(struct synth_trace_event *entry,
+				 struct synth_event *event,
+				 long *stack,
+				 unsigned int data_size,
+				 unsigned int *n_u64)
+{
+	unsigned int len;
+	u32 data_offset;
+	void *data_loc;
+
+	data_offset = struct_size(entry, fields, event->n_u64);
+	data_offset += data_size;
+
+	for (len = 0; len < HIST_STACKTRACE_DEPTH; len++) {
+		if (!stack[len])
+			break;
+	}
+
+	/* Include the zero'd element if it fits */
+	if (len < HIST_STACKTRACE_DEPTH)
+		len++;
+
+	len *= sizeof(long);
+
+	/* Find the dynamic section to copy the stack into. */
+	data_loc = (void *)entry + data_offset;
+	memcpy(data_loc, stack, len);
+
+	/* Fill in the field that holds the offset/len combo */
+	data_offset |= len << 16;
+	*(u32 *)&entry->fields[*n_u64] = data_offset;
+
+	(*n_u64)++;
+
+	return len;
+}
+
 static notrace void trace_event_raw_event_synth(void *__data,
 						u64 *var_ref_vals,
 						unsigned int *var_ref_idx)
@@ -498,6 +564,12 @@ static notrace void trace_event_raw_event_synth(void *__data,
 					   event->fields[i]->is_dynamic,
 					   data_size, &n_u64);
 			data_size += len; /* only dynamic string increments */
+		} if (event->fields[i]->is_stack) {
+		        long *stack = (long *)(long)var_ref_vals[val_idx];
+
+			len = trace_stack(entry, event, stack,
+					   data_size, &n_u64);
+			data_size += len;
 		} else {
 			struct synth_field *field = event->fields[i];
 			u64 val = var_ref_vals[val_idx];
@@ -560,6 +632,9 @@ static int __set_synth_event_print_fmt(struct synth_event *event,
 		    event->fields[i]->is_dynamic)
 			pos += snprintf(buf + pos, LEN_OR_ZERO,
 				", __get_str(%s)", event->fields[i]->name);
+		else if (event->fields[i]->is_stack)
+			pos += snprintf(buf + pos, LEN_OR_ZERO,
+				", __get_stacktrace(%s)", event->fields[i]->name);
 		else
 			pos += snprintf(buf + pos, LEN_OR_ZERO,
 					", REC->%s", event->fields[i]->name);
@@ -696,7 +771,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
 		ret = -EINVAL;
 		goto free;
 	} else if (size == 0) {
-		if (synth_field_is_string(field->type)) {
+		if (synth_field_is_string(field->type) ||
+		    synth_field_is_stack(field->type)) {
 			char *type;
 
 			len = sizeof("__data_loc ") + strlen(field->type) + 1;
@@ -727,6 +803,8 @@ static struct synth_field *parse_synth_field(int argc, char **argv,
 
 	if (synth_field_is_string(field->type))
 		field->is_string = true;
+	else if (synth_field_is_stack(field->type))
+		field->is_stack = true;
 
 	field->is_signed = synth_field_signed(field->type);
  out:
diff --git a/kernel/trace/trace_synth.h b/kernel/trace/trace_synth.h
index b29595fe3ac5..43f6fb6078db 100644
--- a/kernel/trace/trace_synth.h
+++ b/kernel/trace/trace_synth.h
@@ -18,6 +18,7 @@ struct synth_field {
 	bool is_signed;
 	bool is_string;
 	bool is_dynamic;
+	bool is_stack;
 };
 
 struct synth_event {
-- 
cgit v1.2.1


From cc5fc8bfc961eeb99b7e8dffbeff7a3f6995d314 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 17 Jan 2023 10:21:29 -0500
Subject: tracing/histogram: Add stacktrace type

Now that stacktraces can be part of synthetic events, allow a key to be
typed as a stacktrace.

  # cd /sys/kernel/tracing
  # echo 's:block_lat u64 delta; unsigned long stack[];' >> dynamic_events
  # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 2' >> events/sched/sched_switch/trigger
  # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,st2=$st:onmatch(sched.sched_switch).trace(block_lat,$delta,$st2)' >> events/sched/sched_switch/trigger
  # echo 'hist:keys=delta.buckets=100,stack.stacktrace:sort=delta' > events/synthetic/block_lat/trigger

  # cat events/synthetic/block_lat/hist

  # event histogram
  #
  # trigger info: hist:keys=delta.buckets=100,stacktrace:vals=hitcount:sort=delta.buckets=100:size=2048 [active]
  #

  { delta: ~ 0-99, stacktrace:
           event_hist_trigger+0x464/0x480
           event_triggers_call+0x52/0xe0
           trace_event_buffer_commit+0x193/0x250
           trace_event_raw_event_sched_switch+0xfc/0x150
           __traceiter_sched_switch+0x41/0x60
           __schedule+0x448/0x7b0
           schedule_idle+0x26/0x40
           cpu_startup_entry+0x19/0x20
           start_secondary+0xed/0xf0
           secondary_startup_64_no_verify+0xe0/0xeb
  } hitcount:          6
  { delta: ~ 0-99, stacktrace:
           event_hist_trigger+0x464/0x480
           event_triggers_call+0x52/0xe0
           trace_event_buffer_commit+0x193/0x250
           trace_event_raw_event_sched_switch+0xfc/0x150
           __traceiter_sched_switch+0x41/0x60
           __schedule+0x448/0x7b0
           schedule_idle+0x26/0x40
           cpu_startup_entry+0x19/0x20
           __pfx_kernel_init+0x0/0x10
           arch_call_rest_init+0xa/0x24
           start_kernel+0x964/0x98d
           secondary_startup_64_no_verify+0xe0/0xeb
  } hitcount:          3
  { delta: ~ 0-99, stacktrace:
           event_hist_trigger+0x464/0x480
           event_triggers_call+0x52/0xe0
           trace_event_buffer_commit+0x193/0x250
           trace_event_raw_event_sched_switch+0xfc/0x150
           __traceiter_sched_switch+0x41/0x60
           __schedule+0x448/0x7b0
           schedule+0x5a/0xb0
           worker_thread+0xaf/0x380
           kthread+0xe9/0x110
           ret_from_fork+0x2c/0x50
  } hitcount:          1
  { delta: ~ 100-199, stacktrace:
           event_hist_trigger+0x464/0x480
           event_triggers_call+0x52/0xe0
           trace_event_buffer_commit+0x193/0x250
           trace_event_raw_event_sched_switch+0xfc/0x150
           __traceiter_sched_switch+0x41/0x60
           __schedule+0x448/0x7b0
           schedule_idle+0x26/0x40
           cpu_startup_entry+0x19/0x20
           start_secondary+0xed/0xf0
           secondary_startup_64_no_verify+0xe0/0xeb
  } hitcount:         15
  [..]
  { delta: ~ 8500-8599, stacktrace:
           event_hist_trigger+0x464/0x480
           event_triggers_call+0x52/0xe0
           trace_event_buffer_commit+0x193/0x250
           trace_event_raw_event_sched_switch+0xfc/0x150
           __traceiter_sched_switch+0x41/0x60
           __schedule+0x448/0x7b0
           schedule_idle+0x26/0x40
           cpu_startup_entry+0x19/0x20
           start_secondary+0xed/0xf0
           secondary_startup_64_no_verify+0xe0/0xeb
  } hitcount:          1

  Totals:
      Hits: 89
      Entries: 11
      Dropped: 0

Link: https://lkml.kernel.org/r/20230117152236.167046397@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Ching-lin Yu <chinglinyu@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 2c0837ff2299..7f3e6ca6f0fa 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1716,6 +1716,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
 		flags_str = "percent";
 	else if (hist_field->flags & HIST_FIELD_FL_GRAPH)
 		flags_str = "graph";
+	else if (hist_field->flags & HIST_FIELD_FL_STACKTRACE)
+		flags_str = "stacktrace";
 
 	return flags_str;
 }
@@ -2314,6 +2316,8 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 			*flags |= HIST_FIELD_FL_EXECNAME;
 		else if (strcmp(modifier, "syscall") == 0)
 			*flags |= HIST_FIELD_FL_SYSCALL;
+		else if (strcmp(modifier, "stacktrace") == 0)
+			*flags |= HIST_FIELD_FL_STACKTRACE;
 		else if (strcmp(modifier, "log2") == 0)
 			*flags |= HIST_FIELD_FL_LOG2;
 		else if (strcmp(modifier, "usecs") == 0)
-- 
cgit v1.2.1


From 88238513bb26713ef6d4418bdf1af062fe608bcb Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 17 Jan 2023 10:21:30 -0500
Subject: tracing/histogram: Document variable stacktrace

Add a little documentation (and a useful example) of how a stacktrace can
be used within a histogram variable and synthetic event.

Link: https://lkml.kernel.org/r/20230117152236.320181354@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Ching-lin Yu <chinglinyu@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/histogram.rst | 129 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 129 insertions(+)

diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index f95459aa984f..5c391328b9bb 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -81,6 +81,7 @@ Documentation written by Tom Zanussi
 	.usecs         display a common_timestamp in microseconds
         .percent       display a number of percentage value
         .graph         display a bar-graph of a value
+	.stacktrace    display as a stacktrace (must by a long[] type)
 	=============  =================================================
 
   Note that in general the semantics of a given field aren't
@@ -1786,6 +1787,8 @@ or assigned to a variable and referenced in a subsequent expression::
   # echo 'hist:keys=next_pid:us_per_sec=1000000 ...' >> event/trigger
   # echo 'hist:keys=next_pid:timestamp_secs=common_timestamp/$us_per_sec ...' >> event/trigger
 
+Variables can even hold stacktraces, which are useful with synthetic events.
+
 2.2.2 Synthetic Events
 ----------------------
 
@@ -1940,6 +1943,132 @@ the ".buckets" modifier and specify a size (in this case groups of 10).
       Entries: 16
       Dropped: 0
 
+To save stacktraces, create a synthetic event with a field of type "unsigned long[]"
+or even just "long[]". For example, to see how long a task is blocked in an
+uninterruptible state:
+
+  # cd /sys/kernel/tracing
+  # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
+  # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace  if prev_state == 2' >> events/sched/sched_switch/trigger
+  # echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(block_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
+  # echo 1 > events/synthetic/block_lat/enable
+  # cat trace
+
+  # tracer: nop
+  #
+  # entries-in-buffer/entries-written: 2/2   #P:8
+  #
+  #                                _-----=> irqs-off/BH-disabled
+  #                               / _----=> need-resched
+  #                              | / _---=> hardirq/softirq
+  #                              || / _--=> preempt-depth
+  #                              ||| / _-=> migrate-disable
+  #                              |||| /     delay
+  #           TASK-PID     CPU#  |||||  TIMESTAMP  FUNCTION
+  #              | |         |   |||||     |         |
+            <idle>-0       [005] d..4.   521.164922: block_lat: pid=0 delta=8322 stack=STACK:
+  => __schedule+0x448/0x7b0
+  => schedule+0x5a/0xb0
+  => io_schedule+0x42/0x70
+  => bit_wait_io+0xd/0x60
+  => __wait_on_bit+0x4b/0x140
+  => out_of_line_wait_on_bit+0x91/0xb0
+  => jbd2_journal_commit_transaction+0x1679/0x1a70
+  => kjournald2+0xa9/0x280
+  => kthread+0xe9/0x110
+  => ret_from_fork+0x2c/0x50
+
+             <...>-2       [004] d..4.   525.184257: block_lat: pid=2 delta=76 stack=STACK:
+  => __schedule+0x448/0x7b0
+  => schedule+0x5a/0xb0
+  => schedule_timeout+0x11a/0x150
+  => wait_for_completion_killable+0x144/0x1f0
+  => __kthread_create_on_node+0xe7/0x1e0
+  => kthread_create_on_node+0x51/0x70
+  => create_worker+0xcc/0x1a0
+  => worker_thread+0x2ad/0x380
+  => kthread+0xe9/0x110
+  => ret_from_fork+0x2c/0x50
+
+A synthetic event that has a stacktrace field may use it as a key in histogram:
+
+  # echo 'hist:delta.buckets=100,stack.stacktrace:sort=delta' > events/synthetic/block_lat/trigger
+  # cat events/synthetic/block_lat/hist
+
+  # event histogram
+  #
+  # trigger info: hist:keys=delta.buckets=100,stacktrace:vals=hitcount:sort=delta.buckets=100:size=2048 [active]
+  #
+
+  { delta: ~ 0-99, stacktrace:
+           event_hist_trigger+0x464/0x480
+           event_triggers_call+0x52/0xe0
+           trace_event_buffer_commit+0x193/0x250
+           trace_event_raw_event_sched_switch+0xfc/0x150
+           __traceiter_sched_switch+0x41/0x60
+           __schedule+0x448/0x7b0
+           schedule_idle+0x26/0x40
+           cpu_startup_entry+0x19/0x20
+           start_secondary+0xed/0xf0
+           secondary_startup_64_no_verify+0xe0/0xeb
+  } hitcount:          6
+  { delta: ~ 0-99, stacktrace:
+           event_hist_trigger+0x464/0x480
+           event_triggers_call+0x52/0xe0
+           trace_event_buffer_commit+0x193/0x250
+           trace_event_raw_event_sched_switch+0xfc/0x150
+           __traceiter_sched_switch+0x41/0x60
+           __schedule+0x448/0x7b0
+           schedule_idle+0x26/0x40
+           cpu_startup_entry+0x19/0x20
+           __pfx_kernel_init+0x0/0x10
+           arch_call_rest_init+0xa/0x24
+           start_kernel+0x964/0x98d
+           secondary_startup_64_no_verify+0xe0/0xeb
+  } hitcount:          3
+  { delta: ~ 0-99, stacktrace:
+           event_hist_trigger+0x464/0x480
+           event_triggers_call+0x52/0xe0
+           trace_event_buffer_commit+0x193/0x250
+           trace_event_raw_event_sched_switch+0xfc/0x150
+           __traceiter_sched_switch+0x41/0x60
+           __schedule+0x448/0x7b0
+           schedule+0x5a/0xb0
+           worker_thread+0xaf/0x380
+           kthread+0xe9/0x110
+           ret_from_fork+0x2c/0x50
+  } hitcount:          1
+  { delta: ~ 100-199, stacktrace:
+           event_hist_trigger+0x464/0x480
+           event_triggers_call+0x52/0xe0
+           trace_event_buffer_commit+0x193/0x250
+           trace_event_raw_event_sched_switch+0xfc/0x150
+           __traceiter_sched_switch+0x41/0x60
+           __schedule+0x448/0x7b0
+           schedule_idle+0x26/0x40
+           cpu_startup_entry+0x19/0x20
+           start_secondary+0xed/0xf0
+           secondary_startup_64_no_verify+0xe0/0xeb
+  } hitcount:         15
+  [..]
+  { delta: ~ 8500-8599, stacktrace:
+           event_hist_trigger+0x464/0x480
+           event_triggers_call+0x52/0xe0
+           trace_event_buffer_commit+0x193/0x250
+           trace_event_raw_event_sched_switch+0xfc/0x150
+           __traceiter_sched_switch+0x41/0x60
+           __schedule+0x448/0x7b0
+           schedule_idle+0x26/0x40
+           cpu_startup_entry+0x19/0x20
+           start_secondary+0xed/0xf0
+           secondary_startup_64_no_verify+0xe0/0xeb
+  } hitcount:          1
+
+  Totals:
+      Hits: 89
+      Entries: 11
+      Dropped: 0
+
 2.2.3 Hist trigger 'handlers' and 'actions'
 -------------------------------------------
 
-- 
cgit v1.2.1


From b81a3a100cca1bd63d897efa339d047986615c9f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 17 Jan 2023 10:21:31 -0500
Subject: tracing/histogram: Add simple tests for stacktrace usage of synthetic
 events

Update the selftests to include a test of passing a stacktrace between the
events of a synthetic event.

Link: https://lkml.kernel.org/r/20230117152236.475439286@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Cc: Ross Zwisler <zwisler@google.com>
Cc: Ching-lin Yu <chinglinyu@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c                               |  2 +-
 .../inter-event/trigger-synthetic-event-stack.tc   | 24 ++++++++++++++++++++++
 .../inter-event/trigger-synthetic-event-syntax.tc  |  6 ++++++
 3 files changed, 31 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 78ed5f1baa8c..b90eecd27dfc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5757,7 +5757,7 @@ static const char readme_msg[] =
 #ifdef CONFIG_SYNTH_EVENTS
 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
 	"\t  Write into this file to define/undefine new synthetic events.\n"
-	"\t     example: echo 'myevent u64 lat; char name[]' >> synthetic_events\n"
+	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
 #endif
 #endif
 ;
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
new file mode 100644
index 000000000000..755dbe94ccf4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "long[]' >> synthetic_events":README
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+echo "Test create synthetic event with stack"
+
+
+echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
+echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace  if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger
+echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
+echo 1 > events/synthetic/wake_lat/enable
+sleep 1
+
+if ! grep -q "=>.*sched" trace; then
+    fail "Failed to create synthetic event with stack"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
index 2968cdc7df30..366f1f3ad906 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -70,6 +70,12 @@ grep "myevent[[:space:]]unsigned long var" synthetic_events
 echo "myevent char var[10]" > synthetic_events
 grep "myevent[[:space:]]char\[10\] var" synthetic_events
 
+if grep -q 'long\[\]' README; then
+  # test stacktrace type
+  echo "myevent unsigned long[] var" > synthetic_events
+  grep "myevent[[:space:]]unsigned long\[\] var" synthetic_events
+fi
+
 do_reset
 
 exit 0
-- 
cgit v1.2.1


From 92a22cea4c847b518af646a809cd662d55f9d8ac Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 24 Jan 2023 15:22:39 -0500
Subject: perf/tracing: Use stage6 of tracing to not duplicate macros

The perf events are created by the same macro magic as tracefs trace
events are. But to hook into perf, it has its own code. It duplicates many
of the same macros as the tracefs macros and this is an issue because it
misses bug fixes as well as any new enhancements that come with the other
trace macros.

As the trace macros have been put into their own staging files, have perf
take advantage of this and use the tracefs stage 6 macros that the "fast
assign" portion of the trace event macro uses.

Link: https://lkml.kernel.org/r/20230124202515.716458410@goodmis.org
Link: https://lore.kernel.org/lkml/1671181385-5719-1-git-send-email-quic_linyyuan@quicinc.com/

Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reported-by: Linyu Yuan <quic_linyyuan@quicinc.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/perf.h                         | 46 +---------------------------
 include/trace/stages/stage6_event_callback.h |  3 ++
 2 files changed, 4 insertions(+), 45 deletions(-)

diff --git a/include/trace/perf.h b/include/trace/perf.h
index 8f3bf1e17707..2c11181c82e0 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -4,51 +4,7 @@
 
 #ifdef CONFIG_PERF_EVENTS
 
-#undef __entry
-#define __entry entry
-
-#undef __get_dynamic_array
-#define __get_dynamic_array(field)	\
-		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
-
-#undef __get_dynamic_array_len
-#define __get_dynamic_array_len(field)	\
-		((__entry->__data_loc_##field >> 16) & 0xffff)
-
-#undef __get_str
-#define __get_str(field) ((char *)__get_dynamic_array(field))
-
-#undef __get_bitmask
-#define __get_bitmask(field) (char *)__get_dynamic_array(field)
-
-#undef __get_cpumask
-#define __get_cpumask(field) (char *)__get_dynamic_array(field)
-
-#undef __get_sockaddr
-#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
-
-#undef __get_rel_dynamic_array
-#define __get_rel_dynamic_array(field)	\
-		((void *)__entry +					\
-		 offsetof(typeof(*__entry), __rel_loc_##field) +	\
-		 sizeof(__entry->__rel_loc_##field) +			\
-		 (__entry->__rel_loc_##field & 0xffff))
-
-#undef __get_rel_dynamic_array_len
-#define __get_rel_dynamic_array_len(field)	\
-		((__entry->__rel_loc_##field >> 16) & 0xffff)
-
-#undef __get_rel_str
-#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
-
-#undef __get_rel_bitmask
-#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
-
-#undef __get_rel_cpumask
-#define __get_rel_cpumask(field) (char *)__get_rel_dynamic_array(field)
-
-#undef __get_rel_sockaddr
-#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
+#include "stages/stage6_event_callback.h"
 
 #undef __perf_count
 #define __perf_count(c)	(__count = (c))
diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h
index 49c32394b53f..919b1a4da980 100644
--- a/include/trace/stages/stage6_event_callback.h
+++ b/include/trace/stages/stage6_event_callback.h
@@ -2,6 +2,9 @@
 
 /* Stage 6 definitions for creating trace events */
 
+/* Reuse some of the stage 3 macros */
+#include "stage3_trace_output.h"
+
 #undef __entry
 #define __entry entry
 
-- 
cgit v1.2.1


From dc513fd5321d4c15ac1f820c224b2e5220b6e14f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 24 Jan 2023 15:22:40 -0500
Subject: bpf/tracing: Use stage6 of tracing to not duplicate macros

The bpf events are created by the same macro magic as tracefs trace
events are. But to hook into bpf, it has its own code. It duplicates many
of the same macros as the tracefs macros and this is an issue because it
misses bug fixes as well as any new enhancements that come with the other
trace macros.

As the trace macros have been put into their own staging files, have bpf
take advantage of this and use the tracefs stage 6 macros that the "fast
ssign" portion of the trace event macro uses.

Link: https://lkml.kernel.org/r/20230124202515.873075730@goodmis.org
Link: https://lore.kernel.org/lkml/1671181385-5719-1-git-send-email-quic_linyyuan@quicinc.com/

Cc: bpf@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Reported-by: Linyu Yuan <quic_linyyuan@quicinc.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/bpf_probe.h | 45 +--------------------------------------------
 1 file changed, 1 insertion(+), 44 deletions(-)

diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 155c495b89ea..1f7fc1fc590c 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -4,50 +4,7 @@
 
 #ifdef CONFIG_BPF_EVENTS
 
-#undef __entry
-#define __entry entry
-
-#undef __get_dynamic_array
-#define __get_dynamic_array(field)	\
-		((void *)__entry + (__entry->__data_loc_##field & 0xffff))
-
-#undef __get_dynamic_array_len
-#define __get_dynamic_array_len(field)	\
-		((__entry->__data_loc_##field >> 16) & 0xffff)
-
-#undef __get_str
-#define __get_str(field) ((char *)__get_dynamic_array(field))
-
-#undef __get_bitmask
-#define __get_bitmask(field) (char *)__get_dynamic_array(field)
-
-#undef __get_cpumask
-#define __get_cpumask(field) (char *)__get_dynamic_array(field)
-
-#undef __get_sockaddr
-#define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
-
-#undef __get_rel_dynamic_array
-#define __get_rel_dynamic_array(field)	\
-		((void *)(&__entry->__rel_loc_##field) +	\
-		 sizeof(__entry->__rel_loc_##field) +		\
-		 (__entry->__rel_loc_##field & 0xffff))
-
-#undef __get_rel_dynamic_array_len
-#define __get_rel_dynamic_array_len(field)	\
-		((__entry->__rel_loc_##field >> 16) & 0xffff)
-
-#undef __get_rel_str
-#define __get_rel_str(field) ((char *)__get_rel_dynamic_array(field))
-
-#undef __get_rel_bitmask
-#define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
-
-#undef __get_rel_cpumask
-#define __get_rel_cpumask(field) (char *)__get_rel_dynamic_array(field)
-
-#undef __get_rel_sockaddr
-#define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
+#include "stages/stage6_event_callback.h"
 
 #undef __perf_count
 #define __perf_count(c)	(c)
-- 
cgit v1.2.1


From b18c58af29e465d21d4cd9e8c5008ae0e0147384 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Mon, 23 Jan 2023 15:46:49 -0800
Subject: tracing/osnoise: No need for schedule_hrtimeout range

No slack time is being passed, just use schedule_hrtimeout().

Link: https://lore.kernel.org/linux-trace-kernel/20230123234649.17968-1-dave@stgolabs.net

Signed-off-by: Davidlohr Bueso <dave@stgolabs.net>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 210e1f168392..04f0fdae19a1 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -1539,7 +1539,7 @@ static void osnoise_sleep(void)
 	wake_time = ktime_add_us(ktime_get(), interval);
 	__set_current_state(TASK_INTERRUPTIBLE);
 
-	while (schedule_hrtimeout_range(&wake_time, 0, HRTIMER_MODE_ABS)) {
+	while (schedule_hrtimeout(&wake_time, HRTIMER_MODE_ABS)) {
 		if (kthread_should_stop())
 			break;
 	}
-- 
cgit v1.2.1


From a2ff84a5d1e65c7d1178f24ecf39fc55283fbd14 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Sun, 29 Jan 2023 10:14:02 +0700
Subject: tracing/histogram: Wrap remaining shell snippets in code blocks

Most shell command snippets (echo/cat) and their output are already in
literal code blocks. However a few still isn't wrapped, in which the
htmldocs output is ugly.

Wrap the remaining unwrapped snippets, while also fix recent kernel test
robot warnings.

Link: https://lore.kernel.org/linux-trace-kernel/20230129031402.47420-1-bagasdotme@gmail.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Link: https://lore.kernel.org/linux-doc/202301290253.LU5yIxcJ-lkp@intel.com/
Fixes: 88238513bb2671 ("tracing/histogram: Document variable stacktrace")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/histogram.rst | 93 ++++++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 46 deletions(-)

diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 5c391328b9bb..8e95295e39b6 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -1864,7 +1864,7 @@ A histogram can now be defined for the new synthetic event::
 The above shows the latency "lat" in a power of 2 grouping.
 
 Like any other event, once a histogram is enabled for the event, the
-output can be displayed by reading the event's 'hist' file.
+output can be displayed by reading the event's 'hist' file::
 
   # cat /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/hist
 
@@ -1911,7 +1911,7 @@ output can be displayed by reading the event's 'hist' file.
 
 
 The latency values can also be grouped linearly by a given size with
-the ".buckets" modifier and specify a size (in this case groups of 10).
+the ".buckets" modifier and specify a size (in this case groups of 10)::
 
   # echo 'hist:keys=pid,prio,lat.buckets=10:sort=lat' >> \
         /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
@@ -1945,7 +1945,7 @@ the ".buckets" modifier and specify a size (in this case groups of 10).
 
 To save stacktraces, create a synthetic event with a field of type "unsigned long[]"
 or even just "long[]". For example, to see how long a task is blocked in an
-uninterruptible state:
+uninterruptible state::
 
   # cd /sys/kernel/tracing
   # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
@@ -1990,7 +1990,8 @@ uninterruptible state:
   => kthread+0xe9/0x110
   => ret_from_fork+0x2c/0x50
 
-A synthetic event that has a stacktrace field may use it as a key in histogram:
+A synthetic event that has a stacktrace field may use it as a key in
+histogram::
 
   # echo 'hist:delta.buckets=100,stack.stacktrace:sort=delta' > events/synthetic/block_lat/trigger
   # cat events/synthetic/block_lat/hist
@@ -2183,11 +2184,11 @@ The following commonly-used handler.action pairs are available:
               wakeup_new_test($testpid) if comm=="cyclictest"' >> \
               /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger
 
-    Or, equivalently, using the 'trace' keyword syntax:
+    Or, equivalently, using the 'trace' keyword syntax::
 
-    # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\
-            trace(wakeup_new_test,$testpid) if comm=="cyclictest"' >> \
-            /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger
+      # echo 'hist:keys=$testpid:testpid=pid:onmatch(sched.sched_wakeup_new).\
+              trace(wakeup_new_test,$testpid) if comm=="cyclictest"' >> \
+              /sys/kernel/debug/tracing/events/sched/sched_wakeup_new/trigger
 
     Creating and displaying a histogram based on those events is now
     just a matter of using the fields and new synthetic event in the
@@ -2320,48 +2321,48 @@ The following commonly-used handler.action pairs are available:
     resulting latency, stored in wakeup_lat, exceeds the current
     maximum latency, a snapshot is taken.  As part of the setup, all
     the scheduler events are also enabled, which are the events that
-    will show up in the snapshot when it is taken at some point:
+    will show up in the snapshot when it is taken at some point::
 
-    # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+      # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
 
-    # echo 'hist:keys=pid:ts0=common_timestamp.usecs \
-            if comm=="cyclictest"' >> \
-            /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
+      # echo 'hist:keys=pid:ts0=common_timestamp.usecs \
+              if comm=="cyclictest"' >> \
+              /sys/kernel/debug/tracing/events/sched/sched_waking/trigger
 
-    # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \
-            onmax($wakeup_lat).save(next_prio,next_comm,prev_pid,prev_prio, \
-	    prev_comm):onmax($wakeup_lat).snapshot() \
-	    if next_comm=="cyclictest"' >> \
-	    /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
+      # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts0: \
+              onmax($wakeup_lat).save(next_prio,next_comm,prev_pid,prev_prio, \
+	      prev_comm):onmax($wakeup_lat).snapshot() \
+	      if next_comm=="cyclictest"' >> \
+	      /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
 
     When the histogram is displayed, for each bucket the max value
     and the saved values corresponding to the max are displayed
     following the rest of the fields.
 
     If a snapshot was taken, there is also a message indicating that,
-    along with the value and event that triggered the global maximum:
+    along with the value and event that triggered the global maximum::
 
-    # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
-      { next_pid:       2101 } hitcount:        200
-	max:         52  next_prio:        120  next_comm: cyclictest \
-        prev_pid:          0  prev_prio:        120  prev_comm: swapper/6
+      # cat /sys/kernel/debug/tracing/events/sched/sched_switch/hist
+        { next_pid:       2101 } hitcount:        200
+	  max:         52  next_prio:        120  next_comm: cyclictest \
+          prev_pid:          0  prev_prio:        120  prev_comm: swapper/6
 
-      { next_pid:       2103 } hitcount:       1326
-	max:        572  next_prio:         19  next_comm: cyclictest \
-        prev_pid:          0  prev_prio:        120  prev_comm: swapper/1
+        { next_pid:       2103 } hitcount:       1326
+	  max:        572  next_prio:         19  next_comm: cyclictest \
+          prev_pid:          0  prev_prio:        120  prev_comm: swapper/1
 
-      { next_pid:       2102 } hitcount:       1982 \
-	max:         74  next_prio:         19  next_comm: cyclictest \
-        prev_pid:          0  prev_prio:        120  prev_comm: swapper/5
+        { next_pid:       2102 } hitcount:       1982 \
+	  max:         74  next_prio:         19  next_comm: cyclictest \
+          prev_pid:          0  prev_prio:        120  prev_comm: swapper/5
 
-    Snapshot taken (see tracing/snapshot).  Details:
-	triggering value { onmax($wakeup_lat) }:        572	\
-	triggered by event with key: { next_pid:       2103 }
+      Snapshot taken (see tracing/snapshot).  Details:
+	  triggering value { onmax($wakeup_lat) }:        572	\
+	  triggered by event with key: { next_pid:       2103 }
 
-    Totals:
-        Hits: 3508
-        Entries: 3
-        Dropped: 0
+      Totals:
+          Hits: 3508
+          Entries: 3
+          Dropped: 0
 
     In the above case, the event that triggered the global maximum has
     the key with next_pid == 2103.  If you look at the bucket that has
@@ -2439,15 +2440,15 @@ The following commonly-used handler.action pairs are available:
     $cwnd variable.  If the value has changed, a snapshot is taken.
     As part of the setup, all the scheduler and tcp events are also
     enabled, which are the events that will show up in the snapshot
-    when it is taken at some point:
+    when it is taken at some point::
 
-    # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
-    # echo 1 > /sys/kernel/debug/tracing/events/tcp/enable
+      # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+      # echo 1 > /sys/kernel/debug/tracing/events/tcp/enable
 
-    # echo 'hist:keys=dport:cwnd=snd_cwnd: \
-            onchange($cwnd).save(snd_wnd,srtt,rcv_wnd): \
-	    onchange($cwnd).snapshot()' >> \
-	    /sys/kernel/debug/tracing/events/tcp/tcp_probe/trigger
+      # echo 'hist:keys=dport:cwnd=snd_cwnd: \
+              onchange($cwnd).save(snd_wnd,srtt,rcv_wnd): \
+	      onchange($cwnd).snapshot()' >> \
+	      /sys/kernel/debug/tracing/events/tcp/tcp_probe/trigger
 
     When the histogram is displayed, for each bucket the tracked value
     and the saved values corresponding to that value are displayed
@@ -2470,10 +2471,10 @@ The following commonly-used handler.action pairs are available:
       { dport:        443 } hitcount:        211
 	changed:         10  snd_wnd:      26960  srtt:      17379  rcv_wnd:      28800
 
-    Snapshot taken (see tracing/snapshot).  Details::
+      Snapshot taken (see tracing/snapshot).  Details:
 
-        triggering value { onchange($cwnd) }:         10
-        triggered by event with key: { dport:         80 }
+          triggering value { onchange($cwnd) }:         10
+          triggered by event with key: { dport:         80 }
 
       Totals:
           Hits: 414
-- 
cgit v1.2.1


From a9c4bdd505630469f93f5efedfc7a9ca254996c8 Mon Sep 17 00:00:00 2001
From: Linyu Yuan <quic_linyyuan@quicinc.com>
Date: Mon, 30 Jan 2023 15:54:09 +0800
Subject: tracing: Acquire buffer from temparary trace sequence

there is one dwc3 trace event declare as below,
DECLARE_EVENT_CLASS(dwc3_log_event,
	TP_PROTO(u32 event, struct dwc3 *dwc),
	TP_ARGS(event, dwc),
	TP_STRUCT__entry(
		__field(u32, event)
		__field(u32, ep0state)
		__dynamic_array(char, str, DWC3_MSG_MAX)
	),
	TP_fast_assign(
		__entry->event = event;
		__entry->ep0state = dwc->ep0state;
	),
	TP_printk("event (%08x): %s", __entry->event,
			dwc3_decode_event(__get_str(str), DWC3_MSG_MAX,
				__entry->event, __entry->ep0state))
);
the problem is when trace function called, it will allocate up to
DWC3_MSG_MAX bytes from trace event buffer, but never fill the buffer
during fast assignment, it only fill the buffer when output function are
called, so this means if output function are not called, the buffer will
never used.

add __get_buf(len) which acquiree buffer from iter->tmp_seq when trace
output function called, it allow user write string to acquired buffer.

the mentioned dwc3 trace event will changed as below,
DECLARE_EVENT_CLASS(dwc3_log_event,
	TP_PROTO(u32 event, struct dwc3 *dwc),
	TP_ARGS(event, dwc),
	TP_STRUCT__entry(
		__field(u32, event)
		__field(u32, ep0state)
	),
	TP_fast_assign(
		__entry->event = event;
		__entry->ep0state = dwc->ep0state;
	),
	TP_printk("event (%08x): %s", __entry->event,
		dwc3_decode_event(__get_buf(DWC3_MSG_MAX), DWC3_MSG_MAX,
				__entry->event, __entry->ep0state))
);.

Link: https://lore.kernel.org/linux-trace-kernel/1675065249-23368-1-git-send-email-quic_linyyuan@quicinc.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Linyu Yuan <quic_linyyuan@quicinc.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_seq.h                  |  5 +++++
 include/trace/stages/stage3_trace_output.h |  3 +++
 include/trace/stages/stage7_class_define.h |  1 +
 kernel/trace/trace_seq.c                   | 23 +++++++++++++++++++++++
 4 files changed, 32 insertions(+)

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 0c4c7587d6c3..6be92bf559fe 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -95,6 +95,7 @@ extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
 extern int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str,
 			      int prefix_type, int rowsize, int groupsize,
 			      const void *buf, size_t len, bool ascii);
+char *trace_seq_acquire(struct trace_seq *s, unsigned int len);
 
 #else /* CONFIG_TRACING */
 static inline __printf(2, 3)
@@ -139,6 +140,10 @@ static inline int trace_seq_path(struct trace_seq *s, const struct path *path)
 {
 	return 0;
 }
+static inline char *trace_seq_acquire(struct trace_seq *s, unsigned int len)
+{
+	return NULL;
+}
 #endif /* CONFIG_TRACING */
 
 #endif /* _LINUX_TRACE_SEQ_H */
diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h
index 66374df61ed3..c1fb1355d309 100644
--- a/include/trace/stages/stage3_trace_output.h
+++ b/include/trace/stages/stage3_trace_output.h
@@ -139,3 +139,6 @@
 		u64 ____val = (u64)(value);		\
 		(u32) do_div(____val, NSEC_PER_SEC);	\
 	})
+
+#undef __get_buf
+#define __get_buf(len)		trace_seq_acquire(p, (len))
diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h
index 8795429f388b..bcb960d16fc0 100644
--- a/include/trace/stages/stage7_class_define.h
+++ b/include/trace/stages/stage7_class_define.h
@@ -23,6 +23,7 @@
 #undef __get_rel_sockaddr
 #undef __print_array
 #undef __print_hex_dump
+#undef __get_buf
 
 /*
  * The below is not executed in the kernel. It is only what is
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 9c90b3a7dce2..e5e299260d0c 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -403,3 +403,26 @@ int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str,
 	return 1;
 }
 EXPORT_SYMBOL(trace_seq_hex_dump);
+
+/*
+ * trace_seq_acquire - acquire seq buffer with size len
+ * @s: trace sequence descriptor
+ * @len: size of buffer to be acquired
+ *
+ * acquire buffer with size of @len from trace_seq for output usage,
+ * user can fill string into that buffer.
+ *
+ * Returns start address of acquired buffer.
+ *
+ * it allow multiple usage in one trace output function call.
+ */
+char *trace_seq_acquire(struct trace_seq *s, unsigned int len)
+{
+	char *ret = trace_seq_buffer_ptr(s);
+
+	if (!WARN_ON_ONCE(seq_buf_buffer_left(&s->seq) < len))
+		seq_buf_commit(&s->seq, len);
+
+	return ret;
+}
+EXPORT_SYMBOL(trace_seq_acquire);
-- 
cgit v1.2.1


From 01678fbce3dd2e3042077cf71b68157e733614a0 Mon Sep 17 00:00:00 2001
From: Song Shuai <suagrfillet@gmail.com>
Date: Mon, 30 Jan 2023 16:59:54 +0800
Subject: samples: ftrace: Include the nospec-branch.h only for x86

When other architectures without the nospec functionality write their
direct-call functions of samples/ftrace/*.c, the including of
asm/nospec-branch.h must be taken care to fix the no header file found
error in building process.

This commit (ee3e2469b346 "x86/ftrace: Make it call depth tracking aware")
file-globally includes asm/nospec-branch.h providing CALL_DEPTH_ACCOUNT
for only x86 direct-call functions.

It seems better to move the including to `#ifdef CONFIG_X86_64`.

Link: https://lore.kernel.org/linux-trace-kernel/20230130085954.647845-1-suagrfillet@gmail.com

Signed-off-by: Song Shuai <suagrfillet@gmail.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 samples/ftrace/ftrace-direct-modify.c       | 2 +-
 samples/ftrace/ftrace-direct-multi-modify.c | 2 +-
 samples/ftrace/ftrace-direct-multi.c        | 2 +-
 samples/ftrace/ftrace-direct-too.c          | 2 +-
 samples/ftrace/ftrace-direct.c              | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index de5a0f67f320..d93abbcb1f4c 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -3,7 +3,6 @@
 #include <linux/kthread.h>
 #include <linux/ftrace.h>
 #include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
 
 extern void my_direct_func1(void);
 extern void my_direct_func2(void);
@@ -26,6 +25,7 @@ static unsigned long my_ip = (unsigned long)schedule;
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
+#include <asm/nospec-branch.h>
 
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index a825dbd2c9cf..b58c594efb51 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -3,7 +3,6 @@
 #include <linux/kthread.h>
 #include <linux/ftrace.h>
 #include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
 
 extern void my_direct_func1(unsigned long ip);
 extern void my_direct_func2(unsigned long ip);
@@ -24,6 +23,7 @@ extern void my_tramp2(void *);
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
+#include <asm/nospec-branch.h>
 
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index d955a2650605..c27cf130c319 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -5,7 +5,6 @@
 #include <linux/ftrace.h>
 #include <linux/sched/stat.h>
 #include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
 
 extern void my_direct_func(unsigned long ip);
 
@@ -19,6 +18,7 @@ extern void my_tramp(void *);
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
+#include <asm/nospec-branch.h>
 
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index e13fb59a2b47..8139dce2a31c 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -4,7 +4,6 @@
 #include <linux/mm.h> /* for handle_mm_fault() */
 #include <linux/ftrace.h>
 #include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
 
 extern void my_direct_func(struct vm_area_struct *vma,
 			   unsigned long address, unsigned int flags);
@@ -21,6 +20,7 @@ extern void my_tramp(void *);
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
+#include <asm/nospec-branch.h>
 
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index 1f769d0db20f..1d3d307ca33d 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -4,7 +4,6 @@
 #include <linux/sched.h> /* for wake_up_process() */
 #include <linux/ftrace.h>
 #include <asm/asm-offsets.h>
-#include <asm/nospec-branch.h>
 
 extern void my_direct_func(struct task_struct *p);
 
@@ -18,6 +17,7 @@ extern void my_tramp(void *);
 #ifdef CONFIG_X86_64
 
 #include <asm/ibt.h>
+#include <asm/nospec-branch.h>
 
 asm (
 "	.pushsection    .text, \"ax\", @progbits\n"
-- 
cgit v1.2.1


From f94fe7048a352ff8914232a18e2e2f18f8a5ac81 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 30 Jan 2023 14:02:37 +0100
Subject: ftrace: sample: avoid open-coded 64-bit division

Calculating the average period requires a 64-bit division that leads
to a link failure on 32-bit architectures:

x86_64-linux-ld: samples/ftrace/ftrace-ops.o: in function `ftrace_ops_sample_init':
ftrace-ops.c:(.init.text+0x23b): undefined reference to `__udivdi3'

Use the div_u64() helper to do this instead. Since this is an init function that
is not called frequently, the runtime overhead is going to be acceptable.

Link: https://lore.kernel.org/linux-trace-kernel/20230130130246.247537-1-arnd@kernel.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Fixes: b56c68f705ca ("ftrace: Add sample with custom ops")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 samples/ftrace/ftrace-ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/ftrace/ftrace-ops.c b/samples/ftrace/ftrace-ops.c
index 24deb51c7261..0c8da87ff5c3 100644
--- a/samples/ftrace/ftrace-ops.c
+++ b/samples/ftrace/ftrace-ops.c
@@ -223,7 +223,7 @@ static int __init ftrace_ops_sample_init(void)
 
 	pr_info("Attempted %u calls to %ps in %lluns (%lluns / call)\n",
 		nr_function_calls, tracee_relevant,
-		period, period / nr_function_calls);
+		period, div_u64(period, nr_function_calls));
 
 	if (persist)
 		return 0;
-- 
cgit v1.2.1


From aef70ebd624cf2959c531b652365da83f2b19352 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Mon, 30 Jan 2023 11:37:08 -0800
Subject: samples: ftrace: Make some global variables static

smatch reports this representative issue
samples/ftrace/ftrace-ops.c:15:14: warning: symbol 'nr_function_calls' was not declared. Should it be static?

The nr_functions_calls and several other global variables are only
used in ftrace-ops.c, so they should be static.
Remove the instances of initializing static int to 0.

Link: https://lore.kernel.org/linux-trace-kernel/20230130193708.1378108-1-trix@redhat.com

Signed-off-by: Tom Rix <trix@redhat.com>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 samples/ftrace/ftrace-ops.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/samples/ftrace/ftrace-ops.c b/samples/ftrace/ftrace-ops.c
index 0c8da87ff5c3..68d6685c80bd 100644
--- a/samples/ftrace/ftrace-ops.c
+++ b/samples/ftrace/ftrace-ops.c
@@ -12,7 +12,7 @@
  * Arbitrary large value chosen to be sufficiently large to minimize noise but
  * sufficiently small to complete quickly.
  */
-unsigned int nr_function_calls = 100000;
+static unsigned int nr_function_calls = 100000;
 module_param(nr_function_calls, uint, 0);
 MODULE_PARM_DESC(nr_function_calls, "How many times to call the relevant tracee");
 
@@ -21,7 +21,7 @@ MODULE_PARM_DESC(nr_function_calls, "How many times to call the relevant tracee"
  * be called directly or whether it's necessary to go via the list func, which
  * can be significantly more expensive.
  */
-unsigned int nr_ops_relevant = 1;
+static unsigned int nr_ops_relevant = 1;
 module_param(nr_ops_relevant, uint, 0);
 MODULE_PARM_DESC(nr_ops_relevant, "How many ftrace_ops to associate with the relevant tracee");
 
@@ -30,7 +30,7 @@ MODULE_PARM_DESC(nr_ops_relevant, "How many ftrace_ops to associate with the rel
  * tracers enabled for distinct functions can force the use of the list func
  * and incur overhead for all call sites.
  */
-unsigned int nr_ops_irrelevant = 0;
+static unsigned int nr_ops_irrelevant;
 module_param(nr_ops_irrelevant, uint, 0);
 MODULE_PARM_DESC(nr_ops_irrelevant, "How many ftrace_ops to associate with the irrelevant tracee");
 
@@ -38,15 +38,15 @@ MODULE_PARM_DESC(nr_ops_irrelevant, "How many ftrace_ops to associate with the i
  * On architectures with DYNAMIC_FTRACE_WITH_REGS, saving the full pt_regs can
  * be more expensive than only saving the minimal necessary regs.
  */
-bool save_regs = false;
+static bool save_regs;
 module_param(save_regs, bool, 0);
 MODULE_PARM_DESC(save_regs, "Register ops with FTRACE_OPS_FL_SAVE_REGS (save all registers in the trampoline)");
 
-bool assist_recursion = false;
+static bool assist_recursion;
 module_param(assist_recursion, bool, 0);
 MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RECURSION");
 
-bool assist_rcu = false;
+static bool assist_rcu;
 module_param(assist_rcu, bool, 0);
 MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RCU");
 
@@ -55,7 +55,7 @@ MODULE_PARM_DESC(assist_reursion, "Register ops with FTRACE_OPS_FL_RCU");
  * overhead. Sometimes a consistency check using a more expensive tracer is
  * desireable.
  */
-bool check_count = false;
+static bool check_count;
 module_param(check_count, bool, 0);
 MODULE_PARM_DESC(check_count, "Check that tracers are called the expected number of times\n");
 
@@ -64,7 +64,7 @@ MODULE_PARM_DESC(check_count, "Check that tracers are called the expected number
  * runs, but sometimes it can be useful to leave them registered so that they
  * can be inspected through the tracefs 'enabled_functions' file.
  */
-bool persist = false;
+static bool persist;
 module_param(persist, bool, 0);
 MODULE_PARM_DESC(persist, "Successfully load module and leave ftrace ops registered after test completes\n");
 
@@ -114,8 +114,8 @@ static void ops_func_count(unsigned long ip, unsigned long parent_ip,
 	self->count++;
 }
 
-struct sample_ops *ops_relevant;
-struct sample_ops *ops_irrelevant;
+static struct sample_ops *ops_relevant;
+static struct sample_ops *ops_irrelevant;
 
 static struct sample_ops *ops_alloc_init(void *tracee, ftrace_func_t func,
 					 unsigned long flags, int nr)
@@ -163,8 +163,8 @@ static void ops_check(struct sample_ops *ops, int nr,
 	}
 }
 
-ftrace_func_t tracer_relevant = ops_func_nop;
-ftrace_func_t tracer_irrelevant = ops_func_nop;
+static ftrace_func_t tracer_relevant = ops_func_nop;
+static ftrace_func_t tracer_irrelevant = ops_func_nop;
 
 static int __init ftrace_ops_sample_init(void)
 {
-- 
cgit v1.2.1


From 9971c3f944489ff7aacb9d25e0cde841a5f6018a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 31 Jan 2023 09:52:37 -0500
Subject: tracing: Fix trace_event_raw_event_synth() if else statement

The test to check if the field is a stack is to be done if it is not a
string. But the code had:

    } if (event->fields[i]->is_stack) {

and not

   } else if (event->fields[i]->is_stack) {

which would cause it to always be tested. Worse yet, this also included an
"else" statement that was only to be called if the field was not a string
and a stack, but this code allows it to be called if it was a string (and
not a stack).

Also fixed some whitespace issues.

Link: https://lore.kernel.org/all/202301302110.mEtNwkBD-lkp@intel.com/
Link: https://lore.kernel.org/linux-trace-kernel/20230131095237.63e3ca8d@gandalf.local.home

Cc: Tom Zanussi <zanussi@kernel.org>
Fixes: 00cf3d672a9d ("tracing: Allow synthetic events to pass around stacktraces")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace_events_synth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index adb630633f31..306c89e0ce55 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -564,8 +564,8 @@ static notrace void trace_event_raw_event_synth(void *__data,
 					   event->fields[i]->is_dynamic,
 					   data_size, &n_u64);
 			data_size += len; /* only dynamic string increments */
-		} if (event->fields[i]->is_stack) {
-		        long *stack = (long *)(long)var_ref_vals[val_idx];
+		} else if (event->fields[i]->is_stack) {
+			long *stack = (long *)(long)var_ref_vals[val_idx];
 
 			len = trace_stack(entry, event, stack,
 					   data_size, &n_u64);
-- 
cgit v1.2.1


From cb1f98c5e5742273bca39410d504430793ab701a Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2023 12:28:50 -0500
Subject: tracing: Add creation of instances at boot command line

Add kernel command line to add tracing instances. This only creates
instances at boot but still does not enable any events to them. Later
changes will extend this command line to add enabling of events, filters,
and triggers. As well as possibly redirecting trace_printk()!

Link: https://lkml.kernel.org/r/20230207173026.186210158@goodmis.org

Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  6 +++
 kernel/trace/trace.c                            | 50 +++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6cfa6e3996cf..9545da5ed849 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6272,6 +6272,12 @@
 			comma-separated list of trace events to enable. See
 			also Documentation/trace/events.rst
 
+	trace_instance=[instance-info]
+			[FTRACE] Create a ring buffer instance early in boot up.
+			This will be listed in:
+
+				/sys/kernel/tracing/instances
+
 	trace_options=[option-list]
 			[FTRACE] Enable or disable tracer options at boot.
 			The option-list is a comma delimited list of options
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index b90eecd27dfc..863716fcca49 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -49,6 +49,8 @@
 #include <linux/irq_work.h>
 #include <linux/workqueue.h>
 
+#include <asm/setup.h> /* COMMAND_LINE_SIZE */
+
 #include "trace.h"
 #include "trace_output.h"
 
@@ -186,6 +188,9 @@ static char *default_bootup_tracer;
 static bool allocate_snapshot;
 static bool snapshot_at_boot;
 
+static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
+static int boot_instance_index;
+
 static int __init set_cmdline_ftrace(char *str)
 {
 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
@@ -239,6 +244,23 @@ static int __init boot_snapshot(char *str)
 __setup("ftrace_boot_snapshot", boot_snapshot);
 
 
+static int __init boot_instance(char *str)
+{
+	char *slot = boot_instance_info + boot_instance_index;
+	int left = sizeof(boot_instance_info) - boot_instance_index;
+	int ret;
+
+	if (strlen(str) >= left)
+		return -1;
+
+	ret = snprintf(slot, left, "%s\t", str);
+	boot_instance_index += ret;
+
+	return 1;
+}
+__setup("trace_instance=", boot_instance);
+
+
 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
 
 static int __init set_trace_boot_options(char *str)
@@ -10144,6 +10166,31 @@ out:
 	return ret;
 }
 
+__init static void enable_instances(void)
+{
+	struct trace_array *tr;
+	char *curr_str;
+	char *str;
+	char *tok;
+
+	/* A tab is always appended */
+	boot_instance_info[boot_instance_index - 1] = '\0';
+	str = boot_instance_info;
+
+	while ((curr_str = strsep(&str, "\t"))) {
+
+		tok = strsep(&curr_str, ",");
+
+		tr = trace_array_get_by_name(tok);
+		if (!tr) {
+			pr_warn("Failed to create instance buffer %s\n", curr_str);
+			continue;
+		}
+		/* Allow user space to delete it */
+		trace_array_put(tr);
+	}
+}
+
 __init static int tracer_alloc_buffers(void)
 {
 	int ring_buf_size;
@@ -10302,6 +10349,9 @@ void __init early_trace_init(void)
 void __init trace_init(void)
 {
 	trace_event_init();
+
+	if (boot_instance_index)
+		enable_instances();
 }
 
 __init static void clear_boot_tracer(void)
-- 
cgit v1.2.1


From c4846480831ea526bcab0ca0bf6364bfa6ed4227 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2023 12:28:51 -0500
Subject: tracing: Add enabling of events to boot instances

Add the format of:

  trace_instance=foo,sched:sched_switch,irq_handler_entry,initcall

That will create the "foo" instance and enable the sched_switch event
(here were the "sched" system is explicitly specified), the
irq_handler_entry event, and all events under the system initcall.

Link: https://lkml.kernel.org/r/20230207173026.386114535@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 14 ++++++++++++++
 kernel/trace/trace.c                            |  4 ++++
 kernel/trace/trace.h                            |  2 ++
 kernel/trace/trace_events.c                     |  9 ++++-----
 4 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 9545da5ed849..40304dc3d319 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6278,6 +6278,20 @@
 
 				/sys/kernel/tracing/instances
 
+			Events can be enabled at the time the instance is created
+			via:
+
+				trace_instance=<name>,<system1>:<event1>,<system2>:<event2>
+
+			Note, the "<system*>:" portion is optional if the event is
+			unique.
+
+				trace_instance=foo,sched:sched_switch,irq_handler_entry,initcall
+
+			will enable the "sched_switch" event (note, the "sched:" is optional, and
+			the same thing would happen if it was left off). The irq_handler_entry
+			event, and all events under the "initcall" system.
+
 	trace_options=[option-list]
 			[FTRACE] Enable or disable tracer options at boot.
 			The option-list is a comma delimited list of options
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 863716fcca49..5b16af9e7d30 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -10188,6 +10188,10 @@ __init static void enable_instances(void)
 		}
 		/* Allow user space to delete it */
 		trace_array_put(tr);
+
+		while ((tok = strsep(&curr_str, ","))) {
+			early_enable_events(tr, tok, true);
+		}
 	}
 }
 
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d16929dd0f08..f10bf804dd2b 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1334,6 +1334,8 @@ DECLARE_PER_CPU(int, trace_buffered_event_cnt);
 void trace_buffered_event_disable(void);
 void trace_buffered_event_enable(void);
 
+void early_enable_events(struct trace_array *tr, char *buf, bool disable_first);
+
 static inline void
 __trace_event_discard_commit(struct trace_buffer *buffer,
 			     struct ring_buffer_event *event)
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index b07931ad97de..cf3fd74fa675 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -3750,10 +3750,9 @@ static __init int event_trace_memsetup(void)
 	return 0;
 }
 
-static __init void
-early_enable_events(struct trace_array *tr, bool disable_first)
+__init void
+early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
 {
-	char *buf = bootup_event_buf;
 	char *token;
 	int ret;
 
@@ -3806,7 +3805,7 @@ static __init int event_trace_enable(void)
 	 */
 	__trace_early_add_events(tr);
 
-	early_enable_events(tr, false);
+	early_enable_events(tr, bootup_event_buf, false);
 
 	trace_printk_start_comm();
 
@@ -3834,7 +3833,7 @@ static __init int event_trace_enable_again(void)
 	if (!tr)
 		return -ENODEV;
 
-	early_enable_events(tr, true);
+	early_enable_events(tr, bootup_event_buf, true);
 
 	return 0;
 }
-- 
cgit v1.2.1


From d503b8f7474fe7ac616518f7fc49773cbab49f36 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2023 12:28:52 -0500
Subject: tracing: Add trace_array_puts() to write into instance

Add a generic trace_array_puts() that can be used to "trace_puts()" into
an allocated trace_array instance. This is just another variant of
trace_array_printk().

Link: https://lkml.kernel.org/r/20230207173026.584717290@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace.h | 12 ++++++++++++
 kernel/trace/trace.c  | 27 +++++++++++++++++----------
 2 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/include/linux/trace.h b/include/linux/trace.h
index 80ffda871749..2a70a447184c 100644
--- a/include/linux/trace.h
+++ b/include/linux/trace.h
@@ -33,6 +33,18 @@ struct trace_array;
 int register_ftrace_export(struct trace_export *export);
 int unregister_ftrace_export(struct trace_export *export);
 
+/**
+ * trace_array_puts - write a constant string into the trace buffer.
+ * @tr:    The trace array to write to
+ * @str:   The constant string to write
+ */
+#define trace_array_puts(tr, str)					\
+	({								\
+		str ? __trace_array_puts(tr, _THIS_IP_, str, strlen(str)) : -1;	\
+	})
+int __trace_array_puts(struct trace_array *tr, unsigned long ip,
+		       const char *str, int size);
+
 void trace_printk_init_buffers(void);
 __printf(3, 4)
 int trace_array_printk(struct trace_array *tr, unsigned long ip,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5b16af9e7d30..9918bf7441ed 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1023,13 +1023,8 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev
 		ring_buffer_unlock_commit(buffer);
 }
 
-/**
- * __trace_puts - write a constant string into the trace buffer.
- * @ip:	   The address of the caller
- * @str:   The constant string to write
- * @size:  The size of the string.
- */
-int __trace_puts(unsigned long ip, const char *str, int size)
+int __trace_array_puts(struct trace_array *tr, unsigned long ip,
+		       const char *str, int size)
 {
 	struct ring_buffer_event *event;
 	struct trace_buffer *buffer;
@@ -1037,7 +1032,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 	unsigned int trace_ctx;
 	int alloc;
 
-	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
+	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
 		return 0;
 
 	if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1046,7 +1041,7 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
 
 	trace_ctx = tracing_gen_ctx();
-	buffer = global_trace.array_buffer.buffer;
+	buffer = tr->array_buffer.buffer;
 	ring_buffer_nest_start(buffer);
 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
 					    trace_ctx);
@@ -1068,11 +1063,23 @@ int __trace_puts(unsigned long ip, const char *str, int size)
 		entry->buf[size] = '\0';
 
 	__buffer_unlock_commit(buffer, event);
-	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
+	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
  out:
 	ring_buffer_nest_end(buffer);
 	return size;
 }
+EXPORT_SYMBOL_GPL(__trace_array_puts);
+
+/**
+ * __trace_puts - write a constant string into the trace buffer.
+ * @ip:	   The address of the caller
+ * @str:   The constant string to write
+ * @size:  The size of the string.
+ */
+int __trace_puts(unsigned long ip, const char *str, int size)
+{
+	return __trace_array_puts(&global_trace, ip, str, size);
+}
 EXPORT_SYMBOL_GPL(__trace_puts);
 
 /**
-- 
cgit v1.2.1


From 9c1c251d670bc107b9ce600a2c20adb08b4849e9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 7 Feb 2023 12:28:53 -0500
Subject: tracing: Allow boot instances to have snapshot buffers

Add to ftrace_boot_snapshot, "=<instance>" name, where the instance will
get a snapshot buffer, and will take a snapshot at the end of boot (which
will save the boot traces).

Link: https://lkml.kernel.org/r/20230207173026.792774721@goodmis.org

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  9 +++
 kernel/trace/trace.c                            | 79 ++++++++++++++++++++++---
 2 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 40304dc3d319..7fa1ee4ba910 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1532,6 +1532,15 @@
 			boot up that is likely to be overridden by user space
 			start up functionality.
 
+			Optionally, the snapshot can also be defined for a tracing
+			instance that was created by the trace_instance= command
+			line parameter.
+
+			trace_instance=foo,sched_switch ftrace_boot_snapshot=foo
+
+			The above will cause the "foo" tracing instance to trigger
+			a snapshot at the end of boot up.
+
 	ftrace_dump_on_oops[=orig_cpu]
 			[FTRACE] will dump the trace buffers on oops.
 			If no parameter is passed, ftrace will dump
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 9918bf7441ed..97c88711f270 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -191,6 +191,9 @@ static bool snapshot_at_boot;
 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
 static int boot_instance_index;
 
+static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
+static int boot_snapshot_index;
+
 static int __init set_cmdline_ftrace(char *str)
 {
 	strlcpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
@@ -227,9 +230,22 @@ __setup("traceoff_on_warning", stop_trace_on_warning);
 
 static int __init boot_alloc_snapshot(char *str)
 {
-	allocate_snapshot = true;
-	/* We also need the main ring buffer expanded */
-	ring_buffer_expanded = true;
+	char *slot = boot_snapshot_info + boot_snapshot_index;
+	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
+	int ret;
+
+	if (str[0] == '=') {
+		str++;
+		if (strlen(str) >= left)
+			return -1;
+
+		ret = snprintf(slot, left, "%s\t", str);
+		boot_snapshot_index += ret;
+	} else {
+		allocate_snapshot = true;
+		/* We also need the main ring buffer expanded */
+		ring_buffer_expanded = true;
+	}
 	return 1;
 }
 __setup("alloc_snapshot", boot_alloc_snapshot);
@@ -9254,10 +9270,6 @@ static int allocate_trace_buffers(struct trace_array *tr, int size)
 	}
 	tr->allocated_snapshot = allocate_snapshot;
 
-	/*
-	 * Only the top level trace array gets its snapshot allocated
-	 * from the kernel command line.
-	 */
 	allocate_snapshot = false;
 #endif
 
@@ -10173,6 +10185,47 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_TRACER_MAX_TRACE
+__init static bool tr_needs_alloc_snapshot(const char *name)
+{
+	char *test;
+	int len = strlen(name);
+	bool ret;
+
+	if (!boot_snapshot_index)
+		return false;
+
+	if (strncmp(name, boot_snapshot_info, len) == 0 &&
+	    boot_snapshot_info[len] == '\t')
+		return true;
+
+	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
+	if (!test)
+		return false;
+
+	sprintf(test, "\t%s\t", name);
+	ret = strstr(boot_snapshot_info, test) == NULL;
+	kfree(test);
+	return ret;
+}
+
+__init static void do_allocate_snapshot(const char *name)
+{
+	if (!tr_needs_alloc_snapshot(name))
+		return;
+
+	/*
+	 * When allocate_snapshot is set, the next call to
+	 * allocate_trace_buffers() (called by trace_array_get_by_name())
+	 * will allocate the snapshot buffer. That will alse clear
+	 * this flag.
+	 */
+	allocate_snapshot = true;
+}
+#else
+static inline void do_allocate_snapshot(const char *name) { }
+#endif
+
 __init static void enable_instances(void)
 {
 	struct trace_array *tr;
@@ -10188,6 +10241,9 @@ __init static void enable_instances(void)
 
 		tok = strsep(&curr_str, ",");
 
+		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
+			do_allocate_snapshot(tok);
+
 		tr = trace_array_get_by_name(tok);
 		if (!tr) {
 			pr_warn("Failed to create instance buffer %s\n", curr_str);
@@ -10335,10 +10391,19 @@ out:
 
 void __init ftrace_boot_snapshot(void)
 {
+	struct trace_array *tr;
+
 	if (snapshot_at_boot) {
 		tracing_snapshot();
 		internal_trace_puts("** Boot snapshot taken **\n");
 	}
+
+	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+		if (tr == &global_trace)
+			continue;
+		trace_array_puts(tr, "** Boot snapshot taken **\n");
+		tracing_snapshot_instance(tr);
+	}
 }
 
 void __init early_trace_init(void)
-- 
cgit v1.2.1


From fc1a9dc101292403babe0c5c2f99f4748580ed98 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Fri, 10 Feb 2023 15:33:03 -0600
Subject: tracing/histogram: Don't use strlen to find length of stacktrace
 variables

Because stacktraces are saved in dynamic strings,
trace_event_raw_event_synth() uses strlen to determine the length of
the stack.  Stacktraces may contain 0-bytes, though, in the saved
addresses, so the length found and passed to reserve() will be too
small.

Fix this by using the first unsigned long in the stack variables to
store the actual number of elements in the stack and have
trace_event_raw_event_synth() use that to determine the length of the
stack.

Link: https://lkml.kernel.org/r/1ed6906cd9d6477ef2bd8e63c61de20a9ffe64d7.1676063532.git.zanussi@kernel.org

Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c  | 12 ++++++++----
 kernel/trace/trace_events_synth.c |  7 ++++++-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 7f3e6ca6f0fa..f21e42ddba69 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3137,13 +3137,15 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
 				size = min(val->size, STR_VAR_LEN_MAX);
 				strscpy(str, val_str, size);
 			} else {
+				char *stack_start = str + sizeof(unsigned long);
 				int e;
 
-				e = stack_trace_save((void *)str,
+				e = stack_trace_save((void *)stack_start,
 						     HIST_STACKTRACE_DEPTH,
 						     HIST_STACKTRACE_SKIP);
 				if (e < HIST_STACKTRACE_DEPTH - 1)
-					((unsigned long *)str)[e] = 0;
+					((unsigned long *)stack_start)[e] = 0;
+				*((unsigned long *)str) = e;
 			}
 			var_val = (u64)(uintptr_t)str;
 		}
@@ -5135,13 +5137,15 @@ static void hist_trigger_elt_update(struct hist_trigger_data *hist_data,
 					size = min(hist_field->size, STR_VAR_LEN_MAX);
 					strscpy(str, val_str, size);
 				} else {
+					char *stack_start = str + sizeof(unsigned long);
 					int e;
 
-					e = stack_trace_save((void *)str,
+					e = stack_trace_save((void *)stack_start,
 							     HIST_STACKTRACE_DEPTH,
 							     HIST_STACKTRACE_SKIP);
 					if (e < HIST_STACKTRACE_DEPTH - 1)
-						((unsigned long *)str)[e] = 0;
+						((unsigned long *)stack_start)[e] = 0;
+					*((unsigned long *)str) = e;
 				}
 				hist_val = (u64)(uintptr_t)str;
 			}
diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index 306c89e0ce55..70bddb25d9c0 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -538,7 +538,12 @@ static notrace void trace_event_raw_event_synth(void *__data,
 		val_idx = var_ref_idx[field_pos];
 		str_val = (char *)(long)var_ref_vals[val_idx];
 
-		len = kern_fetch_store_strlen((unsigned long)str_val);
+		if (event->dynamic_fields[i]->is_stack) {
+			len = *((unsigned long *)str_val);
+			len *= sizeof(unsigned long);
+		} else {
+			len = kern_fetch_store_strlen((unsigned long)str_val);
+		}
 
 		fields_size += len;
 	}
-- 
cgit v1.2.1


From 8261ef2eb35fce689a82d346b25e945e16bcb9d3 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Wed, 15 Feb 2023 20:25:38 -0500
Subject: tracing: Add BUILD_BUG() to make sure stacktrace fits in strings

The max string length for a histogram variable is 256 bytes. The max depth
of a stacktrace is 16. With 8byte words, that's 16 * 8 = 128. Which can
easily fit in the string variable. The histogram stacktrace is being
stored in the string value (with the given max length), with the
assumption it will fit. To make sure that this is always the case (in the
case that the stack trace depth increases), add a BUILD_BUG_ON() to test
this.

Link: https://lore.kernel.org/linux-trace-kernel/20230214002418.0103b9e765d3e5c374d2aa7d@kernel.org/

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index f21e42ddba69..6cef1def1da5 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3119,6 +3119,9 @@ static inline void __update_field_vars(struct tracing_map_elt *elt,
 	unsigned int i, j, var_idx;
 	u64 var_val;
 
+	/* Make sure stacktrace can fit in the string variable length */
+	BUILD_BUG_ON((HIST_STACKTRACE_DEPTH + 1) * sizeof(long) >= STR_VAR_LEN_MAX);
+
 	for (i = 0, j = field_var_str_start; i < n_field_vars; i++) {
 		struct field_var *field_var = field_vars[i];
 		struct hist_field *var = field_var->var;
-- 
cgit v1.2.1


From 2bacfd9f7e5ac18ae40ecd7b29c63580d0e5b329 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Fri, 10 Feb 2023 15:33:04 -0600
Subject: tracing/histogram: Fix a few problems with stacktrace variable
 printing

Currently, there are a few problems when printing hist triggers and
trace output when using stacktrace variables.  This fixes the problems
seen below:

  # echo 'hist:keys=delta.buckets=100,stack.stacktrace:sort=delta' > /sys/kernel/debug/tracing/events/synthetic/block_lat/trigger
  # cat /sys/kernel/debug/tracing/events/synthetic/block_lat/trigger
  hist:keys=delta.buckets=100,stacktrace:vals=hitcount:sort=delta.buckets=100:size=2048 [active]

  # echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace  if prev_state == 2' >> /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
  # cat /sys/kernel/debug/tracing/events/sched/sched_switch/trigger
  hist:keys=next_pid:vals=hitcount:ts=common_timestamp.usecs,st=stacktrace.stacktrace:sort=hitcount:size=2048:clock=global if prev_state == 2 [active]

and also in the trace output (should be stack.stacktrace):

  {  delta: ~ 100-199, stacktrace         __schedule+0xa19/0x1520

Link: https://lkml.kernel.org/r/60bebd4e546728e012a7a2bcbf58716d48ba6edb.1676063532.git.zanussi@kernel.org

Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 6cef1def1da5..c4f1fe985f6f 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1356,9 +1356,12 @@ static const char *hist_field_name(struct hist_field *field,
 			field_name = field->name;
 	} else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
 		field_name = "common_timestamp";
-	else if (field->flags & HIST_FIELD_FL_STACKTRACE)
-		field_name = "stacktrace";
-	else if (field->flags & HIST_FIELD_FL_HITCOUNT)
+	else if (field->flags & HIST_FIELD_FL_STACKTRACE) {
+		if (field->field)
+			field_name = field->field->name;
+		else
+			field_name = "stacktrace";
+	} else if (field->flags & HIST_FIELD_FL_HITCOUNT)
 		field_name = "hitcount";
 
 	if (field_name == NULL)
@@ -5339,7 +5342,10 @@ static void hist_trigger_print_key(struct seq_file *m,
 			seq_printf(m, "%s: %-30s[%3llu]", field_name,
 				   syscall_name, uval);
 		} else if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
-			seq_puts(m, "stacktrace:\n");
+			if (key_field->field)
+				seq_printf(m, "%s.stacktrace", key_field->field->name);
+			else
+				seq_puts(m, "stacktrace:\n");
 			hist_trigger_stacktrace_print(m,
 						      key + key_field->offset,
 						      HIST_STACKTRACE_DEPTH);
@@ -5884,7 +5890,8 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)
 
 	if (hist_field->flags) {
 		if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) &&
-		    !(hist_field->flags & HIST_FIELD_FL_EXPR)) {
+		    !(hist_field->flags & HIST_FIELD_FL_EXPR) &&
+		    !(hist_field->flags & HIST_FIELD_FL_STACKTRACE)) {
 			const char *flags = get_hist_field_flags(hist_field);
 
 			if (flags)
@@ -5917,9 +5924,12 @@ static int event_hist_trigger_print(struct seq_file *m,
 		if (i > hist_data->n_vals)
 			seq_puts(m, ",");
 
-		if (field->flags & HIST_FIELD_FL_STACKTRACE)
-			seq_puts(m, "stacktrace");
-		else
+		if (field->flags & HIST_FIELD_FL_STACKTRACE) {
+			if (field->field)
+				seq_printf(m, "%s.stacktrace", field->field->name);
+			else
+				seq_puts(m, "stacktrace");
+		} else
 			hist_field_print(m, field);
 	}
 
-- 
cgit v1.2.1


From f5914b301a17575a4cbcb85a0169a3148b958064 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Fri, 10 Feb 2023 15:33:05 -0600
Subject: tracing/histogram: Fix stacktrace key

The current code will always use the current stacktrace as a key even
if a stacktrace contained in a specific event field was specified.

For example, we expect to use the 'unsigned long[] stack' field in the
below event in the histogram:

  # echo 's:block_lat pid_t pid; u64 delta; unsigned long[] stack;' > /sys/kernel/debug/tracing/dynamic_events
  # echo 'hist:keys=delta.buckets=100,stack.stacktrace:sort=delta' > /sys/kernel/debug/tracing/events/synthetic/block_lat/trigger

But in fact, when we type out the trigger, we see that it's using the
plain old global 'stacktrace' as the key, which is just the stacktrace
when the event was hit and not the stacktrace contained in the event,
which is what we want:

  # cat /sys/kernel/debug/tracing/events/synthetic/block_lat/trigger
  hist:keys=delta.buckets=100,stacktrace:vals=hitcount:sort=delta.buckets=100:size=2048 [active]

And in fact, there's no code to actually retrieve it from the event,
so we need to add HIST_FIELD_FN_STACK and hist_field_stack() to get it
and hook it into the trigger code.  For now, since the stack is just
using dynamic strings, this could just use the dynamic string
function, but it seems cleaner to have a dedicated function an be able
to tweak independently as necessary.

Link: https://lkml.kernel.org/r/11aa614c82976adbfa4ea763dbe885b5fb01d59c.1676063532.git.zanussi@kernel.org

Signed-off-by: Tom Zanussi <zanussi@kernel.org>
[ Fixed 32bit build warning reported by kernel test robot <lkp@intel.com> ]
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 34 +++++++++++++++++++++++++++++++---
 1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index c4f1fe985f6f..89877a18f933 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -135,6 +135,7 @@ enum hist_field_fn {
 	HIST_FIELD_FN_DIV_NOT_POWER2,
 	HIST_FIELD_FN_DIV_MULT_SHIFT,
 	HIST_FIELD_FN_EXECNAME,
+	HIST_FIELD_FN_STACK,
 };
 
 /*
@@ -1982,7 +1983,10 @@ static struct hist_field *create_hist_field(struct hist_trigger_data *hist_data,
 	}
 
 	if (flags & HIST_FIELD_FL_STACKTRACE) {
-		hist_field->fn_num = HIST_FIELD_FN_NOP;
+		if (field)
+			hist_field->fn_num = HIST_FIELD_FN_STACK;
+		else
+			hist_field->fn_num = HIST_FIELD_FN_NOP;
 		hist_field->size = HIST_STACKTRACE_SIZE;
 		hist_field->type = kstrdup_const("unsigned long[]", GFP_KERNEL);
 		if (!hist_field->type)
@@ -4274,6 +4278,19 @@ static u64 hist_field_execname(struct hist_field *hist_field,
 	return (u64)(unsigned long)(elt_data->comm);
 }
 
+static u64 hist_field_stack(struct hist_field *hist_field,
+			    struct tracing_map_elt *elt,
+			    struct trace_buffer *buffer,
+			    struct ring_buffer_event *rbe,
+			    void *event)
+{
+	u32 str_item = *(u32 *)(event + hist_field->field->offset);
+	int str_loc = str_item & 0xffff;
+	char *addr = (char *)(event + str_loc);
+
+	return (u64)(unsigned long)addr;
+}
+
 static u64 hist_fn_call(struct hist_field *hist_field,
 			struct tracing_map_elt *elt,
 			struct trace_buffer *buffer,
@@ -4337,6 +4354,8 @@ static u64 hist_fn_call(struct hist_field *hist_field,
 		return div_by_mult_and_shift(hist_field, elt, buffer, rbe, event);
 	case HIST_FIELD_FN_EXECNAME:
 		return hist_field_execname(hist_field, elt, buffer, rbe, event);
+	case HIST_FIELD_FN_STACK:
+		return hist_field_stack(hist_field, elt, buffer, rbe, event);
 	default:
 		return 0;
 	}
@@ -5238,8 +5257,17 @@ static void event_hist_trigger(struct event_trigger_data *data,
 
 		if (key_field->flags & HIST_FIELD_FL_STACKTRACE) {
 			memset(entries, 0, HIST_STACKTRACE_SIZE);
-			stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
-					 HIST_STACKTRACE_SKIP);
+			if (key_field->field) {
+				unsigned long *stack, n_entries;
+
+				field_contents = hist_fn_call(key_field, elt, buffer, rbe, rec);
+				stack = (unsigned long *)(long)field_contents;
+				n_entries = *stack;
+				memcpy(entries, ++stack, n_entries * sizeof(unsigned long));
+			} else {
+				stack_trace_save(entries, HIST_STACKTRACE_DEPTH,
+						 HIST_STACKTRACE_SKIP);
+			}
 			key = entries;
 		} else {
 			field_contents = hist_fn_call(key_field, elt, buffer, rbe, rec);
-- 
cgit v1.2.1


From d8f0ae3ebed416728077fe94698983c30d409241 Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Fri, 10 Feb 2023 15:33:06 -0600
Subject: tracing/histogram: Fix stacktrace histogram Documententation

Fix a small problem with the histogram specification in the
Documentation, and change the example to show output using a
stacktrace field rather than the global stacktrace.

Link: https://lkml.kernel.org/r/f75f807dd4998249e513515f703a2ff7407605f4.1676063532.git.zanussi@kernel.org

Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/histogram.rst | 156 ++++++++++++++++++++++----------------
 1 file changed, 90 insertions(+), 66 deletions(-)

diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 8e95295e39b6..c5cfc6e7baea 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -1993,82 +1993,106 @@ uninterruptible state::
 A synthetic event that has a stacktrace field may use it as a key in
 histogram::
 
-  # echo 'hist:delta.buckets=100,stack.stacktrace:sort=delta' > events/synthetic/block_lat/trigger
+  # echo 'hist:keys=delta.buckets=100,stack.stacktrace:sort=delta' > events/synthetic/block_lat/trigger
   # cat events/synthetic/block_lat/hist
 
   # event histogram
   #
-  # trigger info: hist:keys=delta.buckets=100,stacktrace:vals=hitcount:sort=delta.buckets=100:size=2048 [active]
+  # trigger info: hist:keys=delta.buckets=100,stack.stacktrace:vals=hitcount:sort=delta.buckets=100:size=2048 [active]
   #
-
-  { delta: ~ 0-99, stacktrace:
-           event_hist_trigger+0x464/0x480
-           event_triggers_call+0x52/0xe0
-           trace_event_buffer_commit+0x193/0x250
-           trace_event_raw_event_sched_switch+0xfc/0x150
-           __traceiter_sched_switch+0x41/0x60
-           __schedule+0x448/0x7b0
-           schedule_idle+0x26/0x40
-           cpu_startup_entry+0x19/0x20
-           start_secondary+0xed/0xf0
-           secondary_startup_64_no_verify+0xe0/0xeb
-  } hitcount:          6
-  { delta: ~ 0-99, stacktrace:
-           event_hist_trigger+0x464/0x480
-           event_triggers_call+0x52/0xe0
-           trace_event_buffer_commit+0x193/0x250
-           trace_event_raw_event_sched_switch+0xfc/0x150
-           __traceiter_sched_switch+0x41/0x60
-           __schedule+0x448/0x7b0
-           schedule_idle+0x26/0x40
-           cpu_startup_entry+0x19/0x20
-           __pfx_kernel_init+0x0/0x10
-           arch_call_rest_init+0xa/0x24
-           start_kernel+0x964/0x98d
-           secondary_startup_64_no_verify+0xe0/0xeb
+  { delta: ~ 0-99, stack.stacktrace         __schedule+0xa19/0x1520
+         schedule+0x6b/0x110
+         io_schedule+0x46/0x80
+         bit_wait_io+0x11/0x80
+         __wait_on_bit+0x4e/0x120
+         out_of_line_wait_on_bit+0x8d/0xb0
+         __wait_on_buffer+0x33/0x40
+         jbd2_journal_commit_transaction+0x155a/0x19b0
+         kjournald2+0xab/0x270
+         kthread+0xfa/0x130
+         ret_from_fork+0x29/0x50
+  } hitcount:          1
+  { delta: ~ 0-99, stack.stacktrace         __schedule+0xa19/0x1520
+         schedule+0x6b/0x110
+         io_schedule+0x46/0x80
+         rq_qos_wait+0xd0/0x170
+         wbt_wait+0x9e/0xf0
+         __rq_qos_throttle+0x25/0x40
+         blk_mq_submit_bio+0x2c3/0x5b0
+         __submit_bio+0xff/0x190
+         submit_bio_noacct_nocheck+0x25b/0x2b0
+         submit_bio_noacct+0x20b/0x600
+         submit_bio+0x28/0x90
+         ext4_bio_write_page+0x1e0/0x8c0
+         mpage_submit_page+0x60/0x80
+         mpage_process_page_bufs+0x16c/0x180
+         mpage_prepare_extent_to_map+0x23f/0x530
+  } hitcount:          1
+  { delta: ~ 0-99, stack.stacktrace         __schedule+0xa19/0x1520
+         schedule+0x6b/0x110
+         schedule_hrtimeout_range_clock+0x97/0x110
+         schedule_hrtimeout_range+0x13/0x20
+         usleep_range_state+0x65/0x90
+         __intel_wait_for_register+0x1c1/0x230 [i915]
+         intel_psr_wait_for_idle_locked+0x171/0x2a0 [i915]
+         intel_pipe_update_start+0x169/0x360 [i915]
+         intel_update_crtc+0x112/0x490 [i915]
+         skl_commit_modeset_enables+0x199/0x600 [i915]
+         intel_atomic_commit_tail+0x7c4/0x1080 [i915]
+         intel_atomic_commit_work+0x12/0x20 [i915]
+         process_one_work+0x21c/0x3f0
+         worker_thread+0x50/0x3e0
+         kthread+0xfa/0x130
   } hitcount:          3
-  { delta: ~ 0-99, stacktrace:
-           event_hist_trigger+0x464/0x480
-           event_triggers_call+0x52/0xe0
-           trace_event_buffer_commit+0x193/0x250
-           trace_event_raw_event_sched_switch+0xfc/0x150
-           __traceiter_sched_switch+0x41/0x60
-           __schedule+0x448/0x7b0
-           schedule+0x5a/0xb0
-           worker_thread+0xaf/0x380
-           kthread+0xe9/0x110
-           ret_from_fork+0x2c/0x50
+  { delta: ~ 0-99, stack.stacktrace         __schedule+0xa19/0x1520
+         schedule+0x6b/0x110
+         schedule_timeout+0x11e/0x160
+         __wait_for_common+0x8f/0x190
+         wait_for_completion+0x24/0x30
+         __flush_work.isra.0+0x1cc/0x360
+         flush_work+0xe/0x20
+         drm_mode_rmfb+0x18b/0x1d0 [drm]
+         drm_mode_rmfb_ioctl+0x10/0x20 [drm]
+         drm_ioctl_kernel+0xb8/0x150 [drm]
+         drm_ioctl+0x243/0x560 [drm]
+         __x64_sys_ioctl+0x92/0xd0
+         do_syscall_64+0x59/0x90
+         entry_SYSCALL_64_after_hwframe+0x72/0xdc
   } hitcount:          1
-  { delta: ~ 100-199, stacktrace:
-           event_hist_trigger+0x464/0x480
-           event_triggers_call+0x52/0xe0
-           trace_event_buffer_commit+0x193/0x250
-           trace_event_raw_event_sched_switch+0xfc/0x150
-           __traceiter_sched_switch+0x41/0x60
-           __schedule+0x448/0x7b0
-           schedule_idle+0x26/0x40
-           cpu_startup_entry+0x19/0x20
-           start_secondary+0xed/0xf0
-           secondary_startup_64_no_verify+0xe0/0xeb
-  } hitcount:         15
-  [..]
-  { delta: ~ 8500-8599, stacktrace:
-           event_hist_trigger+0x464/0x480
-           event_triggers_call+0x52/0xe0
-           trace_event_buffer_commit+0x193/0x250
-           trace_event_raw_event_sched_switch+0xfc/0x150
-           __traceiter_sched_switch+0x41/0x60
-           __schedule+0x448/0x7b0
-           schedule_idle+0x26/0x40
-           cpu_startup_entry+0x19/0x20
-           start_secondary+0xed/0xf0
-           secondary_startup_64_no_verify+0xe0/0xeb
+  { delta: ~ 0-99, stack.stacktrace         __schedule+0xa19/0x1520
+         schedule+0x6b/0x110
+         schedule_timeout+0x87/0x160
+         __wait_for_common+0x8f/0x190
+         wait_for_completion_timeout+0x1d/0x30
+         drm_atomic_helper_wait_for_flip_done+0x57/0x90 [drm_kms_helper]
+         intel_atomic_commit_tail+0x8ce/0x1080 [i915]
+         intel_atomic_commit_work+0x12/0x20 [i915]
+         process_one_work+0x21c/0x3f0
+         worker_thread+0x50/0x3e0
+         kthread+0xfa/0x130
+         ret_from_fork+0x29/0x50
+  } hitcount:          1
+  { delta: ~ 100-199, stack.stacktrace         __schedule+0xa19/0x1520
+         schedule+0x6b/0x110
+         schedule_hrtimeout_range_clock+0x97/0x110
+         schedule_hrtimeout_range+0x13/0x20
+         usleep_range_state+0x65/0x90
+         pci_set_low_power_state+0x17f/0x1f0
+         pci_set_power_state+0x49/0x250
+         pci_finish_runtime_suspend+0x4a/0x90
+         pci_pm_runtime_suspend+0xcb/0x1b0
+         __rpm_callback+0x48/0x120
+         rpm_callback+0x67/0x70
+         rpm_suspend+0x167/0x780
+         rpm_idle+0x25a/0x380
+         pm_runtime_work+0x93/0xc0
+         process_one_work+0x21c/0x3f0
   } hitcount:          1
 
   Totals:
-      Hits: 89
-      Entries: 11
-      Dropped: 0
+    Hits: 10
+    Entries: 7
+    Dropped: 0
 
 2.2.3 Hist trigger 'handlers' and 'actions'
 -------------------------------------------
-- 
cgit v1.2.1


From 2455f0e124d317dd08d337a7550a78a224d4ba41 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <zwisler@chromium.org>
Date: Wed, 15 Feb 2023 15:33:45 -0700
Subject: tracing: Always use canonical ftrace path

The canonical location for the tracefs filesystem is at /sys/kernel/tracing.

But, from Documentation/trace/ftrace.rst:

  Before 4.1, all ftrace tracing control files were within the debugfs
  file system, which is typically located at /sys/kernel/debug/tracing.
  For backward compatibility, when mounting the debugfs file system,
  the tracefs file system will be automatically mounted at:

  /sys/kernel/debug/tracing

Many comments and Kconfig help messages in the tracing code still refer
to this older debugfs path, so let's update them to avoid confusion.

Link: https://lore.kernel.org/linux-trace-kernel/20230215223350.2658616-2-zwisler@google.com

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com>
Signed-off-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/kernel.h                    |  2 +-
 include/linux/tracepoint.h                |  4 ++--
 kernel/trace/Kconfig                      | 20 ++++++++++----------
 kernel/trace/kprobe_event_gen_test.c      |  2 +-
 kernel/trace/ring_buffer.c                |  2 +-
 kernel/trace/synth_event_gen_test.c       |  2 +-
 kernel/trace/trace.c                      |  2 +-
 samples/user_events/example.c             |  4 ++--
 scripts/tracing/draw_functrace.py         |  6 +++---
 tools/lib/api/fs/tracing_path.c           |  4 ++--
 tools/tracing/latency/latency-collector.c |  2 +-
 11 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index fe6efb24d151..40bce7495af8 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -297,7 +297,7 @@ bool mac_pton(const char *s, u8 *mac);
  *
  * Use tracing_on/tracing_off when you want to quickly turn on or off
  * tracing. It simply enables or disables the recording of the trace events.
- * This also corresponds to the user space /sys/kernel/debug/tracing/tracing_on
+ * This also corresponds to the user space /sys/kernel/tracing/tracing_on
  * file, which gives a means for the kernel and userspace to interact.
  * Place a tracing_off() in the kernel where you want tracing to end.
  * From user space, examine the trace, and then echo 1 > tracing_on
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 4b33b95eb8be..fa1004fcf810 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -471,7 +471,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  *	* This is how the trace record is structured and will
  *	* be saved into the ring buffer. These are the fields
  *	* that will be exposed to user-space in
- *	* /sys/kernel/debug/tracing/events/<*>/format.
+ *	* /sys/kernel/tracing/events/<*>/format.
  *	*
  *	* The declared 'local variable' is called '__entry'
  *	*
@@ -531,7 +531,7 @@ static inline struct tracepoint *tracepoint_ptr_deref(tracepoint_ptr_t *p)
  * tracepoint callback (this is used by programmatic plugins and
  * can also by used by generic instrumentation like SystemTap), and
  * it is also used to expose a structured trace record in
- * /sys/kernel/debug/tracing/events/.
+ * /sys/kernel/tracing/events/.
  *
  * A set of (un)registration functions can be passed to the variant
  * TRACE_EVENT_FN to perform any (un)registration work.
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index d7043043f59c..5f5e64f9e715 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -239,7 +239,7 @@ config DYNAMIC_FTRACE
 	  enabled, and the functions not enabled will not affect
 	  performance of the system.
 
-	  See the files in /sys/kernel/debug/tracing:
+	  See the files in /sys/kernel/tracing:
 	    available_filter_functions
 	    set_ftrace_filter
 	    set_ftrace_notrace
@@ -299,7 +299,7 @@ config STACK_TRACER
 	select KALLSYMS
 	help
 	  This special tracer records the maximum stack footprint of the
-	  kernel and displays it in /sys/kernel/debug/tracing/stack_trace.
+	  kernel and displays it in /sys/kernel/tracing/stack_trace.
 
 	  This tracer works by hooking into every function call that the
 	  kernel executes, and keeping a maximum stack depth value and
@@ -339,7 +339,7 @@ config IRQSOFF_TRACER
 	  disabled by default and can be runtime (re-)started
 	  via:
 
-	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
+	      echo 0 > /sys/kernel/tracing/tracing_max_latency
 
 	  (Note that kernel size and overhead increase with this option
 	  enabled. This option and the preempt-off timing option can be
@@ -363,7 +363,7 @@ config PREEMPT_TRACER
 	  disabled by default and can be runtime (re-)started
 	  via:
 
-	      echo 0 > /sys/kernel/debug/tracing/tracing_max_latency
+	      echo 0 > /sys/kernel/tracing/tracing_max_latency
 
 	  (Note that kernel size and overhead increase with this option
 	  enabled. This option and the irqs-off timing option can be
@@ -515,7 +515,7 @@ config TRACER_SNAPSHOT
 	  Allow tracing users to take snapshot of the current buffer using the
 	  ftrace interface, e.g.:
 
-	      echo 1 > /sys/kernel/debug/tracing/snapshot
+	      echo 1 > /sys/kernel/tracing/snapshot
 	      cat snapshot
 
 config TRACER_SNAPSHOT_PER_CPU_SWAP
@@ -527,7 +527,7 @@ config TRACER_SNAPSHOT_PER_CPU_SWAP
 	  full swap (all buffers). If this is set, then the following is
 	  allowed:
 
-	      echo 1 > /sys/kernel/debug/tracing/per_cpu/cpu2/snapshot
+	      echo 1 > /sys/kernel/tracing/per_cpu/cpu2/snapshot
 
 	  After which, only the tracing buffer for CPU 2 was swapped with
 	  the main tracing buffer, and the other CPU buffers remain the same.
@@ -574,7 +574,7 @@ config PROFILE_ANNOTATED_BRANCHES
 	  This tracer profiles all likely and unlikely macros
 	  in the kernel. It will display the results in:
 
-	  /sys/kernel/debug/tracing/trace_stat/branch_annotated
+	  /sys/kernel/tracing/trace_stat/branch_annotated
 
 	  Note: this will add a significant overhead; only turn this
 	  on if you need to profile the system's use of these macros.
@@ -587,7 +587,7 @@ config PROFILE_ALL_BRANCHES
 	  taken in the kernel is recorded whether it hit or miss.
 	  The results will be displayed in:
 
-	  /sys/kernel/debug/tracing/trace_stat/branch_all
+	  /sys/kernel/tracing/trace_stat/branch_all
 
 	  This option also enables the likely/unlikely profiler.
 
@@ -638,8 +638,8 @@ config BLK_DEV_IO_TRACE
 	  Tracing also is possible using the ftrace interface, e.g.:
 
 	    echo 1 > /sys/block/sda/sda1/trace/enable
-	    echo blk > /sys/kernel/debug/tracing/current_tracer
-	    cat /sys/kernel/debug/tracing/trace_pipe
+	    echo blk > /sys/kernel/tracing/current_tracer
+	    cat /sys/kernel/tracing/trace_pipe
 
 	  If unsure, say N.
 
diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c
index c736487fc0e4..4850fdfe27f1 100644
--- a/kernel/trace/kprobe_event_gen_test.c
+++ b/kernel/trace/kprobe_event_gen_test.c
@@ -21,7 +21,7 @@
  * Then:
  *
  * # insmod kernel/trace/kprobe_event_gen_test.ko
- * # cat /sys/kernel/debug/tracing/trace
+ * # cat /sys/kernel/tracing/trace
  *
  * You should see many instances of the "gen_kprobe_test" and
  * "gen_kretprobe_test" events in the trace buffer.
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 45d4a23d6044..071184324d18 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2886,7 +2886,7 @@ rb_check_timestamp(struct ring_buffer_per_cpu *cpu_buffer,
 		  sched_clock_stable() ? "" :
 		  "If you just came from a suspend/resume,\n"
 		  "please switch to the trace global clock:\n"
-		  "  echo global > /sys/kernel/debug/tracing/trace_clock\n"
+		  "  echo global > /sys/kernel/tracing/trace_clock\n"
 		  "or add trace_clock=global to the kernel command line\n");
 }
 
diff --git a/kernel/trace/synth_event_gen_test.c b/kernel/trace/synth_event_gen_test.c
index 8d77526892f4..8dfe85499d4a 100644
--- a/kernel/trace/synth_event_gen_test.c
+++ b/kernel/trace/synth_event_gen_test.c
@@ -22,7 +22,7 @@
  * Then:
  *
  * # insmod kernel/trace/synth_event_gen_test.ko
- * # cat /sys/kernel/debug/tracing/trace
+ * # cat /sys/kernel/tracing/trace
  *
  * You should see several events in the trace buffer -
  * "create_synth_test", "empty_synth_test", and several instances of
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 97c88711f270..fbb602a8b64b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1187,7 +1187,7 @@ void tracing_snapshot_instance(struct trace_array *tr)
  *
  * Note, make sure to allocate the snapshot with either
  * a tracing_snapshot_alloc(), or by doing it manually
- * with: echo 1 > /sys/kernel/debug/tracing/snapshot
+ * with: echo 1 > /sys/kernel/tracing/snapshot
  *
  * If the snapshot buffer is not allocated, it will stop tracing.
  * Basically making a permanent snapshot.
diff --git a/samples/user_events/example.c b/samples/user_events/example.c
index d06dc24156ec..18e34c9d708e 100644
--- a/samples/user_events/example.c
+++ b/samples/user_events/example.c
@@ -23,8 +23,8 @@
 #endif
 
 /* Assumes debugfs is mounted */
-const char *data_file = "/sys/kernel/debug/tracing/user_events_data";
-const char *status_file = "/sys/kernel/debug/tracing/user_events_status";
+const char *data_file = "/sys/kernel/tracing/user_events_data";
+const char *status_file = "/sys/kernel/tracing/user_events_status";
 
 static int event_status(long **status)
 {
diff --git a/scripts/tracing/draw_functrace.py b/scripts/tracing/draw_functrace.py
index 438516bdfb3c..42fa87300941 100755
--- a/scripts/tracing/draw_functrace.py
+++ b/scripts/tracing/draw_functrace.py
@@ -12,9 +12,9 @@ calls. Only the functions's names and the call time are provided.
 
 Usage:
 	Be sure that you have CONFIG_FUNCTION_TRACER
-	# mount -t debugfs nodev /sys/kernel/debug
-	# echo function > /sys/kernel/debug/tracing/current_tracer
-	$ cat /sys/kernel/debug/tracing/trace_pipe > ~/raw_trace_func
+	# mount -t tracefs nodev /sys/kernel/tracing
+	# echo function > /sys/kernel/tracing/current_tracer
+	$ cat /sys/kernel/tracing/trace_pipe > ~/raw_trace_func
 	Wait some times but not too much, the script is a bit slow.
 	Break the pipe (Ctrl + Z)
 	$ scripts/tracing/draw_functrace.py < ~/raw_trace_func > draw_functrace
diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index b8e457c841ab..7ba3e81274e8 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -14,8 +14,8 @@
 #include "tracing_path.h"
 
 static char tracing_mnt[PATH_MAX]  = "/sys/kernel/debug";
-static char tracing_path[PATH_MAX]        = "/sys/kernel/debug/tracing";
-static char tracing_events_path[PATH_MAX] = "/sys/kernel/debug/tracing/events";
+static char tracing_path[PATH_MAX]        = "/sys/kernel/tracing";
+static char tracing_events_path[PATH_MAX] = "/sys/kernel/tracing/events";
 
 static void __tracing_path_set(const char *tracing, const char *mountpoint)
 {
diff --git a/tools/tracing/latency/latency-collector.c b/tools/tracing/latency/latency-collector.c
index 59a7f2346eab..0fd9c747d396 100644
--- a/tools/tracing/latency/latency-collector.c
+++ b/tools/tracing/latency/latency-collector.c
@@ -1584,7 +1584,7 @@ static void *do_printloop(void *arg)
 		/*
 		 * Toss a coin to decide if we want to sleep before printing
 		 * out the backtrace. The reason for this is that opening
-		 * /sys/kernel/debug/tracing/trace will cause a blackout of
+		 * /sys/kernel/tracing/trace will cause a blackout of
 		 * hundreds of ms, where no latencies will be noted by the
 		 * latency tracer. Thus by randomly sleeping we try to avoid
 		 * missing traces systematically due to this. With this option
-- 
cgit v1.2.1


From e7bb66f79a7b19a47b3eff745ea9f7ba1ae76032 Mon Sep 17 00:00:00 2001
From: Jianlin Lv <iecedge@gmail.com>
Date: Wed, 2 Nov 2022 16:02:36 +0000
Subject: tracepoint: Allow livepatch module add trace event

In the case of keeping the system running, the preferred method for
tracing the kernel is dynamic tracing (kprobe), but the drawback of
this method is that events are lost, especially when tracing packages
in the network stack.

Livepatching provides a potential solution, which is to reimplement the
function you want to replace and insert a static tracepoint.
In such a way, custom stable static tracepoints can be expanded without
rebooting the system.

Link: https://lkml.kernel.org/r/20221102160236.11696-1-iecedge@gmail.com

Signed-off-by: Jianlin Lv <iecedge@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/tracepoint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c
index f23144af5743..8d1507dd0724 100644
--- a/kernel/tracepoint.c
+++ b/kernel/tracepoint.c
@@ -571,8 +571,8 @@ static void for_each_tracepoint_range(
 bool trace_module_has_bad_taint(struct module *mod)
 {
 	return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP) |
-			       (1 << TAINT_UNSIGNED_MODULE) |
-			       (1 << TAINT_TEST));
+				(1 << TAINT_UNSIGNED_MODULE) | (1 << TAINT_TEST) |
+				(1 << TAINT_LIVEPATCH));
 }
 
 static BLOCKING_NOTIFIER_HEAD(tracepoint_notify_list);
-- 
cgit v1.2.1


From 7568a21e52f60930ba8ae7897c2521bdab3ef5a4 Mon Sep 17 00:00:00 2001
From: Wang ShaoBo <bobo.shaobowang@huawei.com>
Date: Wed, 23 Nov 2022 14:51:24 +0800
Subject: tracing: Remove unnecessary NULL assignment

Remove unnecessary NULL assignment int create_new_subsystem().

Link: https://lkml.kernel.org/r/20221123065124.3982439-1-bobo.shaobowang@huawei.com

Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index cf3fd74fa675..b52c4d79eb78 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2261,8 +2261,6 @@ create_new_subsystem(const char *name)
 	if (!system->name)
 		goto out_free;
 
-	system->filter = NULL;
-
 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
 	if (!system->filter)
 		goto out_free;
-- 
cgit v1.2.1