Kaynağa Gözat

Feature #TASK_QT-18250 跨线程追踪

Carl 2 ay önce
ebeveyn
işleme
44d04eaaee

+ 11 - 0
ebpftracer/ebpf/include/apm_trace.h

@@ -53,6 +53,15 @@
 #define MAX_MQ_TOPIC_SIZE 256  // Max MQ topic size (e.g., Kafka topic)
 #define MAX_MQ_KEY_SIZE 256     // Max MQ key size (e.g., Kafka message key)
 
+/***********************************************************
+ * Trace Info Type Definitions
+ * 定义 trace_info 的查找来源类型
+ ***********************************************************/
+#define APM_TRACE_INFO_TYPE_FIRST_FOUND     1  // 第一次从 trace_info_heap 中找到(通过 goid)
+#define APM_TRACE_INFO_TYPE_REUSE_FOUND     2  // 从 goid_trace_info_heap 中复用找到(第二次或多次)
+#define APM_TRACE_INFO_TYPE_THREAD_FOUND   3  // 从 thread_trace_info_heap 中找到(通过 tgid_pid)
+#define APM_TRACE_INFO_TYPE_CTX_FOUND       4  // 从 thread_ctx 的 trace_key 中找到
+
 /***********************************************************
  * Trace struct
  ***********************************************************/
@@ -181,5 +190,7 @@ static __always_inline void span_context_to_cw_string_stream(struct apm_span_con
 	(e)->trace_start = 0; \
 	(e)->trace_end = 0; \
 } while (0)
+static __inline __attribute__((__always_inline__))
+struct apm_trace_info_t *get_apm_trace_info_by_trace_key(struct apm_trace_key_t trace_key);
 
 #endif //EUSPACES_APM_TRACE_H

+ 269 - 10
ebpftracer/ebpf/l7/apm_trace.c

@@ -1,6 +1,45 @@
 //
 // Created by Carl.Guo on 2024/4/1.
 //
+/*
+ * l7_request_key { __u64 fd;
+ __u32 pid;
+__u16 is_tls;
+__s16 stream_id;
+__u32 id;
+}
+l7_request_key_start { __u64 fd; __u32 pid;} value=id
+l7_request_key_end{ __u64 fd; __u32 pid;} value=id
+
+触发一次就id++,然后把id作为l7_request_key组成的一部分
+
+Redis end事件被触发时,l7_request_key_end的value就++,然后查询l7_request_key id=其新增的值
+if redis start {
+l7_request_key_start { __u64 fd; __u32 pid;} value=id
+
+start count ++
+
+update,l7_request_key
+
+}
+
+if redis end {
+
+从l7_request_key_end中获取当前end总数
+end次数+1,构造l7_request_key,获取信息
+
+// clean current
+clean l7_request_key id=end
+
+获取l7_request_key_start信息,
+// clean global
+if start count=end  count{
+
+clean l7_request_key_start && l7_request_key_end
+
+}
+ */
+#include "apm_trace.h"
 
 struct {
 	__uint(type, BPF_MAP_TYPE_LRU_HASH);
@@ -137,6 +176,132 @@ struct {
 	__uint(max_entries, 1);
 } apm_span_context_heap SEC(".maps");
 
+
+// ---------- 可调参数 ----------
+#define TTL_MS              3600000ULL     // 上下文与边的默认有效期(1小时,毫秒)
+#define LAST_WAKE_WINS      0               // 0: 首次唤醒者胜出;1: 最近一次唤醒者胜出(带锁)
+
+static __always_inline __u64 now_ns(void)  { return bpf_ktime_get_ns(); }
+static __always_inline __u64 ms_to_ns(__u64 ms) { return ms * 1000ULL * 1000ULL; }
+
+// ---------- 事件结构(tracepoint payload 的最小子集) ----------
+// /sys/kernel/tracing/events/sched/sched_wakeup/format
+// 注意:tracepoint 数据从 offset 8 开始,前面 8 字节是 common 字段
+struct tp_sched_wakeup {
+	char   comm[16];     // offset 8-23
+	__u32  pid;          // offset 24-27 (wakee tid)
+	__u32  prio;         // offset 28-31
+	__u32  target_cpu;   // offset 32-35
+};
+
+// /sys/kernel/tracing/events/sched/sched_switch/format
+// 注意:tracepoint 数据从 offset 8 开始,前面 8 字节是 common 字段
+struct tp_sched_switch {
+	char   prev_comm[16];  // offset 8-23
+	__u32  prev_pid;       // offset 24-27
+	__u32  prev_prio;      // offset 28-31
+	__s64  prev_state;     // offset 32-39
+	char   next_comm[16];  // offset 40-55
+	__u32  next_pid;       // offset 56-59 (即将运行的 tid)
+	__u32  next_prio;      // offset 60-63
+};
+
+// ---------- 业务上下文(thread_ctx) ----------
+struct thread_ctx_t {
+	__u64 token;         // 追踪 token / trace_id(入口处写入)
+	__u64 ts_ns;         // 最近一次刷新时间
+	__u64 exp_ns;        // 过期时间(now + TTL)
+	__u32 root_thread;   // 主线程
+	__u32 tgid;          // 所属进程
+	__u32 parent_tid;    // 父线程(用于调试/回溯)
+	__u8  is_main_thread;// 主线程
+	__u16 level;         // 继承层数
+	struct apm_trace_info_t *trace_info;
+	struct apm_trace_key_t trace_key;
+};
+
+// ---------- waker→wakee 的“边” ----------
+struct edge_t {
+#if LAST_WAKE_WINS
+	struct bpf_spin_lock lock; // 仅在“最近一次”策略下使用
+#endif
+	__u64 token;         // 继承自 waker 的 token
+	__u64 ts_ns;         // 唤醒时间戳(用于“最近一次”比较)
+	__u64 exp_ns;        // 边过期时间(过期后允许重建)
+	__u32 parent_tid;    // waker tid(父)
+};
+
+// thread_ctx:按 tid 存活的上下文
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__type(key, __u32);              // tid
+	__type(value, struct thread_ctx_t);
+	__uint(max_entries, 65535);
+} thread_ctx SEC(".maps");
+
+// wake_edge:按 wakee_tid 暂存"边"
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__type(key, __u32);              // wakee tid
+	__type(value, struct edge_t);
+	__uint(max_entries, 65535);
+} wake_edge SEC(".maps");
+
+// tgid_root_thread:按 tgid 查找 root thread tid
+// 用于在无法通过 sched_wakeup/sched_switch 关联时,回退查找同一进程的 root thread context
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__type(key, __u32);              // tgid
+	__type(value, __u32);           // root_thread tid
+	__uint(max_entries, 1024);
+} tgid_root_thread SEC(".maps");
+
+// ---------- 你的“Server trace start”处应当写入的入口函数示例 ----------
+// 注意:这只是示意(uprobe/uretprobe 里的做法),真正入口请在你的用户函数处调用。
+static __always_inline int set_root_ctx(__u64 t, __u64 pidtgid, struct apm_trace_info_t trace_info) {
+	__u32 tid = (__u32) pidtgid;
+	__u32 tgid = (__u32) (pidtgid >> 32);
+	struct thread_ctx_t r = {};
+	r.trace_key = trace_info.trace_key;
+	r.token = trace_info.trace_id;
+	r.tgid = tgid;
+	r.root_thread = tid;
+	r.ts_ns = t;
+	r.exp_ns = t + 30ULL * 1000 * 1000; // 30ms
+	r.parent_tid = 0;
+	r.is_main_thread = 1;
+	r.level = 1;
+	bpf_map_update_elem(&thread_ctx, &tid, &r, BPF_ANY);
+	// 保存 tgid -> root_thread 映射,用于回退查找
+	bpf_map_update_elem(&tgid_root_thread, &tgid, &tid, BPF_ANY);
+	return 0;
+}
+
+//// 线程上下文结构体
+//struct thread_ctx_t {
+//	__u64 token;
+//	__u32 parent_tid;
+//	__u64 ts_ns;
+//	__u64 exp_ns;
+//	__u32 tgid;
+//};
+//
+//// 线程上下文映射
+//struct {
+//	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+//	__uint(key_size, sizeof(__u32));
+//	__uint(value_size, sizeof(struct thread_ctx_t));
+//	__uint(max_entries, 65536);
+//} thread_ctx SEC(".maps");
+//
+//struct tp_sched_wakeup {
+//	__u64 pad;          // trace_entry 8字节
+//	char  comm[16];     // 8..23
+//	int   pid;          // 24..27  ← wakee_tid
+//	int   prio;
+//	int   target_cpu;
+//};
+
 static __inline __attribute__((__always_inline__))
 struct apm_trace_key_t get_apm_trace_key(__u64 timeout, bool is_socket_io) {
 	__u64 pid_tgid = bpf_get_current_pid_tgid();
@@ -195,6 +360,62 @@ __u64 get_apm_trace_id(__u32 pid, __u32 tid) {
 	if (trace_info) {
 		cw_bpf_debug("info_trace_id:%llu", trace_info->trace_id);
 		return trace_info->trace_id;
+	} else {
+		struct thread_ctx_t *current_ctx = bpf_map_lookup_elem(&thread_ctx, &tid);
+		if (current_ctx) {
+			bpf_printk("    [Redis] has context:  %u->%u token=%llu", tid, current_ctx->parent_tid, current_ctx->token);
+			bpf_printk("    [Redis] has context:  %u->%u token=%llu", tid, current_ctx->root_thread, current_ctx->token);
+			bpf_printk("    [Redis] has context:  trace_key tid:%d,pid", current_ctx->trace_key.pid);
+			struct thread_ctx_t *has_root_ctx = bpf_map_lookup_elem(&thread_ctx, &current_ctx->trace_key.pid);
+			if (!has_root_ctx) {
+				bpf_printk("thead is inactive.");
+				// 线程没有 context,尝试回退查找同一进程的 root thread context
+				// 这解决了仅依赖 sched_wakeup/sched_switch 无法关联的问题
+				bpf_printk("    [R1] tid:%d has NO context, trying fallback: pid=%u (tgid)", tid, pid);
+				// 通过 tgid 查找 root thread
+				__u32 *root_tid_ptr = bpf_map_lookup_elem(&tgid_root_thread, &pid);
+				if (root_tid_ptr) {
+					__u32 root_tid = *root_tid_ptr;
+					bpf_printk("    [R2] Found root thread: tgid=%u -> root_tid=%u", pid, root_tid);
+
+					// 通过 root thread 查找 context
+					struct thread_ctx_t *root_ctx = bpf_map_lookup_elem(&thread_ctx, &root_tid);
+//					if (root_ctx ) {
+					if (root_ctx && root_ctx->exp_ns > bpf_ktime_get_ns()) {
+						bpf_printk("    [R3] Found root context: token=%llu", root_ctx->token);
+						return root_ctx->token;
+					} else {
+						bpf_printk("    [R] Root context not found or expired for root_tid=%u", root_tid);
+					}
+				} else {
+					bpf_printk("    [R4] No root thread mapping found for tgid=%u", pid);
+					bpf_printk("    [R5] This thread may have been created before trace start or was not captured by sched_wakeup");
+				}
+			}
+			return current_ctx->token;
+		} else {
+			// 线程没有 context,尝试回退查找同一进程的 root thread context
+			// 这解决了仅依赖 sched_wakeup/sched_switch 无法关联的问题
+			bpf_printk("    [R6] tid:%d has NO context, trying fallback: pid=%u (tgid)", tid, pid);
+			// 通过 tgid 查找 root thread
+			__u32 *root_tid_ptr = bpf_map_lookup_elem(&tgid_root_thread, &pid);
+			if (root_tid_ptr) {
+				__u32 root_tid = *root_tid_ptr;
+				bpf_printk("    [R7] Found root thread: tgid=%u -> root_tid=%u", pid, root_tid);
+
+				// 通过 root thread 查找 context
+				struct thread_ctx_t *root_ctx = bpf_map_lookup_elem(&thread_ctx, &root_tid);
+				if (root_ctx && root_ctx->exp_ns > bpf_ktime_get_ns()) {
+					bpf_printk("    [R8] Found root context: token=%llu", root_ctx->token);
+					return root_ctx->token;
+				} else {
+					bpf_printk("    [R9] Root context not found or expired for root_tid=%u", root_tid);
+				}
+			} else {
+				bpf_printk("    [R10] No root thread mapping found for tgid=%u", pid);
+				bpf_printk("    [R11] This thread may have been created before trace start or was not captured by sched_wakeup");
+			}
+		}
 	}
 	return 0;
 }
@@ -254,12 +475,12 @@ struct apm_trace_info_t *get_apm_trace_info_v2(struct apm_trace_key_t trace_key,
 
 static __inline __attribute__((__always_inline__))
 struct apm_trace_info_t *
-get_apm_trace_info_v3(struct apm_trace_key_t trace_key, __u64 tgid_pid, __u32 tgid, __u32 pid) {
-	struct apm_trace_info_t *trace_info = get_apm_trace_info_v2(trace_key, tgid, pid);
+get_apm_trace_info_v3(struct apm_trace_key_t trace_key, __u64 tgid_pid, __u32 tgid, __u32 tid) {
+	struct apm_trace_info_t *trace_info = get_apm_trace_info_v2(trace_key, tgid, tid);
 	struct goid_trace_key_t key_goid = {.tgid = tgid, .goid = trace_key.goid};
 
 	if (trace_info != NULL) {
-		trace_info->type = 1;
+		trace_info->type = APM_TRACE_INFO_TYPE_FIRST_FOUND;
 //		    trace_info = get_apm_trace_info_v2(id, pid, tid);
 //		trace_id = trace_info->trace_id;
 		trace_info->goid_trace_key = key_goid;
@@ -288,7 +509,7 @@ get_apm_trace_info_v3(struct apm_trace_key_t trace_key, __u64 tgid_pid, __u32 tg
 				// 将key保存在trace_info用于后续清除
 				trace_info->goid_trace_key = key_goid;
 				// Second or multiple times
-				trace_info->type = 2;
+				trace_info->type = APM_TRACE_INFO_TYPE_REUSE_FOUND;
 			}
 		}
 	}
@@ -299,8 +520,23 @@ get_apm_trace_info_v3(struct apm_trace_key_t trace_key, __u64 tgid_pid, __u32 tg
 		trace_info = bpf_map_lookup_elem(&thread_trace_info_heap, &t_key);
 		if (trace_info != NULL) {
 //			bpf_printk("ttttttttttttt trace_id:%llu", trace_info->trace_id);
-			trace_info->type = 3;
+			trace_info->type = APM_TRACE_INFO_TYPE_THREAD_FOUND;
+		}
+	}
+
+	if (trace_info == NULL) {
+		struct thread_ctx_t *current_ctx = bpf_map_lookup_elem(&thread_ctx, &tid);
+		if (current_ctx) {
+//			bpf_printk("    [HTTP] has context:  %u->%u token=%llu", tid, current_ctx->parent_tid, current_ctx->token);
+//			bpf_printk("    [HTTP] has context:  %u->%u token=%llu", tid, current_ctx->root_thread, current_ctx->token);
+			trace_info = get_apm_trace_info_by_trace_key(current_ctx->trace_key);
+			if (trace_info != NULL) {
+				trace_info->type = APM_TRACE_INFO_TYPE_CTX_FOUND;
+			}
+		} else {
+			bpf_printk("    [HTTP] tid:%d has NO context", tid);
 		}
+
 	}
 	return trace_info;
 }
@@ -374,7 +610,12 @@ struct apm_span_context *cw_get_parent_tracking_span() {
 	struct apm_span_context *apm_sc = {0};
 	struct apm_span_context *span_contexts = bpf_map_lookup_elem(&apm_parent_span_context_map, &trace_key);
 //	bpf_printk("-------");
-
+	if (!span_contexts) {
+		struct thread_ctx_t *current_ctx = bpf_map_lookup_elem(&thread_ctx, &trace_key.pid);
+		if (current_ctx) {
+			span_contexts = bpf_map_lookup_elem(&apm_parent_span_context_map, &current_ctx->trace_key);
+		}
+	}
 	if (span_contexts) {
 		/*for (int i = 0; i < APM_TYPE_FROM_SIZE; i++) {
 			bpf_printk("type_from[%d] = %02x", i, span_contexts->type_from[i]);
@@ -438,7 +679,8 @@ __u64 clear_current_span_context() {
 }
 
 static __inline __attribute__((__always_inline__))
-__u64 cw_clear_trace(__u32 tgid, __u32 pid, __u32 fd) {
+__u64 cw_clear_trace(__u32 tgid, __u32 tid, __u32 fd) {
+
 	struct apm_trace_key_t trace_key = get_apm_trace_key(120 * NS_PER_SEC, true);
 	struct fd_trace_key_t fd_trace_key = get_fd_trace_key(tgid, fd);
 	// 清除 trace_info_heap
@@ -453,6 +695,14 @@ __u64 cw_clear_trace(__u32 tgid, __u32 pid, __u32 fd) {
 	bpf_map_delete_elem(&trace_info_heap, &trace_key);
 	bpf_map_delete_elem(&fd_trace_info_heap, &fd_trace_key);
 	bpf_map_delete_elem(&apm_parent_span_context_map, &trace_key);
+
+	// 清理 tgid_root_thread:trace end 一定在 root thread 中执行
+	// 直接删除映射即可,因为 trace start 和 trace end 都在同一个线程
+	bpf_map_delete_elem(&tgid_root_thread, &tgid);
+	// 最后删除 thread_ctx
+	bpf_map_delete_elem(&thread_ctx, &tid);
+	bpf_printk("clean trace %d",tid);
+
 	return 0;
 }
 
@@ -460,13 +710,15 @@ __u64 cw_clear_trace(__u32 tgid, __u32 pid, __u32 fd) {
 static __inline __attribute__((__always_inline__))
 void cw_save_current_tracking_span(struct apm_span_context *sc) {
 	struct apm_trace_key_t trace_key = get_apm_trace_key(120 * NS_PER_SEC, true);
-//	bpf_printk("[save cw sc]%d",trace_key.goid);
+	bpf_printk("    [build header] [save cw sc] tid:(%d)",trace_key.pid);
 	bpf_map_update_elem(&apm_current_span_context_map, &trace_key, sc, BPF_ANY);
 }
 
 
 static __inline __attribute__((__always_inline__))
-struct apm_span_context *cw_get_current_tracking_span(struct apm_trace_info_t *trace_info) {
+struct apm_span_context *
+cw_get_current_tracking_span(struct apm_trace_info_t *trace_info, struct apm_trace_key_t origin_trace_key,
+                             unsigned char assumed_app_id[APM_ASSUMED_APP_ID_SIZE],unsigned char span_id[APM_SPAN_ID_SIZE]) {
 	struct apm_trace_key_t trace_key = {0};
 	if (trace_info){
 		trace_key = trace_info->trace_key;
@@ -482,10 +734,16 @@ struct apm_span_context *cw_get_current_tracking_span(struct apm_trace_info_t *t
 		if (trace_info->routine_reuse == true) {
 //			bpf_printk("[get cw sc bbb] :%llu", trace_info->trace_key.goid);
 			struct apm_trace_key_t ancestor_trace_key = trace_info->trace_key;
+			trace_key = ancestor_trace_key;
 			span_contexts = bpf_map_lookup_elem(&apm_current_span_context_map, &ancestor_trace_key);
 		}
 	}
 
+	if (!span_contexts) {
+		trace_key = origin_trace_key;
+		span_contexts = bpf_map_lookup_elem(&apm_current_span_context_map, &origin_trace_key);
+	}
+
 	if (span_contexts) {
 		/*for (int i = 0; i < APM_TYPE_FROM_SIZE; i++) {
 			bpf_printk("cw_get_current_tracking_span-type_from[%d] = %02x", i, span_contexts->type_from[i]);
@@ -512,6 +770,7 @@ struct apm_span_context *cw_get_current_tracking_span(struct apm_trace_info_t *t
 			bpf_printk("cw_get_current_tracking_span-span_id[%d] = %02x", i, span_contexts->span_id[i]);
 		}*/
 		apm_sc = span_contexts;
+		bpf_map_delete_elem(&apm_current_span_context_map, &trace_key);
 	}
 //	bpf_printk("-------end");
 
@@ -605,7 +864,7 @@ struct apm_trace_info_t cw_save_trace_info(__u64 id, __u32 pid, __u64 fd) {
 	bpf_map_update_elem(&trace_info_heap, &trace_info.trace_key, &trace_info, BPF_NOEXIST);
 	bpf_map_update_elem(&fd_trace_info_heap, &trace_info.fd_trace_key, &trace_info, BPF_NOEXIST);
 	bpf_map_update_elem(&thread_trace_info_heap, &trace_info.thread_trace_key, &trace_info, BPF_NOEXIST);
-
+	set_root_ctx(uid_base, id, trace_info);
 
 //	struct goid_trace_key_t key = {.tgid = id, .goid = get_current_goroutine()};
 //	bpf_map_update_elem(&goid_trace_info_heap, &key, &trace_info, BPF_NOEXIST);

+ 615 - 9
ebpftracer/ebpf/l7/l7.c

@@ -154,11 +154,29 @@ struct {
     __uint(max_entries, 10240);
 } active_reads SEC(".maps");
 
+struct l7_request_count_key {
+	__u64 token;
+	__u64 fd;
+};
+
+struct l7_request_count {
+	__u32 start_count;
+	__u32 end_count;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__uint(key_size, sizeof(struct l7_request_count_key));
+	__uint(value_size, sizeof(struct l7_request_count));
+	__uint(max_entries, 10240);
+} l7_request_fd_heap SEC(".maps");
+
 struct l7_request_key {
     __u64 fd;
     __u32 pid;
     __u16 is_tls;
     __s16 stream_id;
+	__u32 read_seq;
 };
 
 struct l7_request {
@@ -275,6 +293,36 @@ static __always_inline void cw_string_to_span_context(char *str, struct apm_span
 static __always_inline void generate_random_bytes(unsigned char *buff, __u32 size);
 static __inline __attribute__((__always_inline__)) void cw_save_parent_tracking_span(struct apm_span_context *sc);
 
+//static __always_inline int set_root_ctx(__u64 token)
+//{
+//	__u64 t = bpf_ktime_get_ns();
+//	__u64 pidtgid = bpf_get_current_pid_tgid();
+//	__u32 tid  = (__u32) pidtgid;
+//	__u32 tgid = (__u32)(pidtgid >> 32);
+//
+//	struct thread_ctx_t r = {};
+//	r.token = token;
+//	r.tgid = tgid;
+//	r.root_thread = tid;
+//	r.ts_ns = t;
+//	r.exp_ns = t + 30ULL * 1000 * 1000; // 30ms
+//	r.parent_tid = 0;
+//	r.is_main_thread = 1;
+//	r.level=1;
+//
+//	// 同一线程串行处理多个请求:用 BPF_ANY 刷新为"当前请求"
+//	bpf_printk("save root tid=%u",tid);
+//	bpf_map_update_elem(&thread_ctx, &tid, &r, BPF_ANY);
+//
+//	struct thread_ctx_t *next_ctx = bpf_map_lookup_elem(&thread_ctx, &tid);
+//	if (next_ctx) {
+//		bpf_printk("get ctx token=%llu",next_ctx->token);
+//		return 0;
+//	}
+//
+//	return 0;
+//}
+
 static inline __attribute__((__always_inline__))
 void send_event(void *ctx, struct l7_event *e, struct connection_id cid, struct connection *conn) {
     e->connection_timestamp = conn->timestamp;
@@ -431,15 +479,21 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
 
 	    struct apm_trace_info_t * start_trace_info = get_trace_info_by_fd(pid, fd);
 	    if (!start_trace_info) {
+		    bpf_printk("[Trace] [End] no info");
 		    return -1;
 		}
         __u64 trace_id = start_trace_info->trace_id;
 	    __u32 event_count = cw_get_event_count(trace_id);
 //	    bpf_printk("[Trace End in l7] count(%d) %llu ", event_count, trace_id);
+
+	    bpf_printk("[Trace] [End] tid:[%d] FD:[%d] traceid:[%llu]", tid, fd, trace_id);
+
         cw_bpf_debug("[uprobeThread/pidpidpidpid][Trace End in l7][HTTP]pid:[%d]--[%lld]", tid, bpf_ktime_get_ns());
 	    cw_bpf_debug("[Trace End in l7][Response][HTTP] event_count:%d", event_count);
 	    cw_bpf_debug("[Trace End in l7][Response][HTTP] pid:%d,fd:%d,trace_id:%llu", tid, fd, trace_id);
 
+//	    bpf_printk("[Trace] [End] tid:[%d] FD:[%d] traceid:[%llu]", tid, k.fd, trace_id);
+
         // 发送事件到用户空间 start
         struct l7_event *e = bpf_map_lookup_elem(&l7_event_heap, &zero);
         if (!e) {
@@ -561,7 +615,7 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
 
     if (is_http_request(payload)) {
 	    cw_bpf_debug("");
-	    cw_bpf_debug("-----[Kernel HTTP Enter]:pid:[%d]|CURRENT-GOID:[%llu]|FD:[%d]", tid, get_current_goroutine(), k.fd);
+//	    bpf_printk("[HTTP] Start tid:[%d] FD:[%d]", tid, get_current_goroutine(), k.fd);
 	    __u8 type =  0;
 	    __u64 trace_id = 0;
 	    struct apm_trace_key_t trace_key = get_apm_trace_key(120 * NS_PER_SEC, true);
@@ -576,10 +630,25 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
 			trace_id = trace_info->trace_id;
 			type = trace_info->type;
 	    }
+	    bpf_printk("   [HTTP] [Start] tid:[%d]  traceid:[%llu] payload:%s", tid, trace_id, payload);
+
+	    // 检查当前线程是否有上下文
+//	    struct thread_ctx_t *current_ctx = bpf_map_lookup_elem(&thread_ctx, &tid);
+//	    if (current_ctx) {
+//		    bpf_printk("    [HTTP] has context:  %u->%u token=%llu", tid, current_ctx->parent_tid, current_ctx->token);
+//		    bpf_printk("    [HTTP] has context:  %u->%u token=%llu", tid, current_ctx->root_thread, current_ctx->token);
+//	    } else {
+//		    bpf_printk("    [HTTP] tid:%d has NO context", tid);
+//	    }
+
 	    req->protocol = PROTOCOL_HTTP;
 		req->trace_id = trace_id;
+//	    bpf_printk("   [HTTP] [Start] tid:[%d] FD:[%d] traceid:[%llu]", tid, fd, trace_id);
+//	    bpf_printk("   [HTTP] [Start] payload:%s",  payload);
+
         // cw_bpf_debug("l7.c111 addr is --------:%d,%s",conn->sport,conn->saddr);
-	    struct  apm_span_context * sc = cw_get_current_tracking_span(trace_info);
+	    struct apm_span_context *sc = cw_get_current_tracking_span(trace_info, trace_key, req->assumed_app_id,
+	                                                               req->span_id);
 	    if (sc) {
 		    cw_copy_byte_arrays(sc->assumed_app_id, req->assumed_app_id, APM_ASSUMED_APP_ID_SIZE);
 		    cw_copy_byte_arrays(sc->span_id, req->span_id, APM_SPAN_ID_SIZE);
@@ -610,8 +679,18 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
 //        }
         req->protocol = PROTOCOL_POSTGRES;
     } else if (is_redis_query(payload, size)) {
-        cw_bpf_debug("[Enter][Redis]:TGID:%d|type:%s|FD:%d\n",k.pid,"type",k.fd);
+	    __u32 read_seq = get_tcp_read_seq_from_fd(k.fd);
+	    __u32 write_seq = get_tcp_write_seq_from_fd(k.fd);
+//        bpf_printk("    [Enter][Redis]:s|FD:%d\n",k.pid,"type",k.fd);
+	    bpf_printk("   -----------[Redis] [Start] tid:[%d] fd:[%d]", tid,k.fd);
+	    bpf_printk("   -----------[Redis] [Start] read_seq:%d write_seq:%d size:%d", read_seq, write_seq, size);
+	    __u64 trace_id = get_apm_trace_id(pid, tid);
+	    bpf_printk("   -----------[Redis] [Start] traceid:[%llu]", trace_id);
+
+	    req->trace_id = trace_id;
+
         req->protocol = PROTOCOL_REDIS;
+
     } else if (is_memcached_query(payload, size)) {
 	    cw_bpf_debug("[Enter][MEMCACHE]:TGID:%d|type:%s|FD:%d\n",k.pid,"type",k.fd);
         req->protocol = PROTOCOL_MEMCACHED;
@@ -860,7 +939,7 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
 //	    fd_trace_key = get_fd_trace_key(pid, k.fd);
 //
 //		// trace info
-	    struct apm_trace_info_t trace_info = cw_save_trace_info(id,pid, k.fd);
+		struct apm_trace_info_t trace_info = cw_save_trace_info(id, pid, k.fd);
 //	    __u64 uid_base = bpf_ktime_get_ns();
 //	    trace_info.trace_id = bpf_get_current_pid_tgid() + uid_base;
 
@@ -868,10 +947,17 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
         e->trace_type = 0;
         e->trace_id = trace_info.trace_id;
 	    cw_bpf_debug("\n");
-		cw_bpf_debug("[Trace Start in l7][HTTP]pid:[%d]--[%lld]--trace_id:%llu\n", pid, bpf_ktime_get_ns(),trace_info.trace_id);
+		bpf_printk("[Trace] [start] tid:[%d] trace_id:[%llu] fd:%d\n", pid, trace_info.trace_id, k.fd);
 	    cw_bpf_debug("[Trace Start in l7][Receive][HTTP]tid:[%d]|GOID:[%d]|FD:%d\n", tid, trace_info.trace_key.goid,k.fd);
         e->payload_size = ret;
         COPY_PAYLOAD(e->payload, ret, payload);
+		__u32 read_seq = get_tcp_read_seq_from_fd(k.fd);
+		__u32 write_seq = get_tcp_write_seq_from_fd(k.fd);
+//        bpf_printk("    [Enter][Redis]:s|FD:%d\n",k.pid,"type",k.fd);
+		bpf_printk("   -----------[Trace] [Start] read_seq:%d write_seq:%d size:%d", read_seq, write_seq, ret);
+
+		bpf_printk("[Trace] [start] tid:[%d] payload:%s",tid, payload);
+
 
 		// pid_tgid:trace_id
 //		thread_trace_key =
@@ -937,6 +1023,10 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
     struct l7_request *req = bpf_map_lookup_elem(&active_l7_requests, &k);
     int response = 0;
     if (!req) {
+	    __u32 read_seq = get_tcp_read_seq_from_fd(k.fd);
+	    __u32 write_seq = get_tcp_write_seq_from_fd(k.fd);
+	    bpf_printk("   [Redis] [End] NO REQ: read_seq:%d write_seq:%d size:%d", read_seq, write_seq, ret);
+	    bpf_printk("   [Redis] [End] NO REQ: fd=%d tid=%d", k.fd, tid);
 //	    cw_bpf_debug("no req? 6:[0x%x] k.pid:%d, k.fd:%d",b[4],k.pid,k.fd);
         if (is_dns_response(payload, ret, &k.stream_id, &e->status)) {
 //	        cw_bpf_debug("dns");
@@ -1021,10 +1111,11 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
 //        // 请求报文
 //	    bpf_printk("[Response][HTTP222] req-payload:%s",e->payload);
 //        // 响应报文
-//        cw_bpf_debug("[Response][HTTP222] resp-payload:%s",payload);
+        cw_bpf_debug("[Response][HTTP222] resp-payload:%s",payload);
 
 		response = is_http_response(payload, &e->status);
 //	    cw_bpf_debug("[Kernel End][HTTP]:pid:[%d]|CURRENT-GOID:[%llu]|trace_id:[%llu]---------\n", tid, get_current_goroutine(),e->trace_id);
+		bpf_printk("   [HTTP] [End] tid:[%d] FD:[%d] traceid:[%llu]", tid, k.fd, e->trace_id);
 
 	} else if (e->protocol == PROTOCOL_POSTGRES) {
 //        __u64 trace_id = get_apm_trace_id(pid, tid);
@@ -1039,15 +1130,23 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
 			e->method = METHOD_STATEMENT_PREPARE;
 		}
 	} else if (e->protocol == PROTOCOL_REDIS) {
+		e->trace_id = req->trace_id;
 		cw_bpf_debug("[Response][Redis]:TGID:%d|type:%s|FD:%d\n", k.pid, "", k.fd);
 //		__u64 trace_id = get_apm_trace_id(pid, tid);
 //		e->trace_id = req->trace_id;
-		cw_bpf_debug("[Redis] trace_id:%llu", req->trace_id);
+//		bpf_printk("    [Redis] trace_id:%llu", req->trace_id);
+		bpf_printk("   [Redis] [End] tid:[%d] fd:[%d] traceid:[%llu] ", tid, k.fd, req->trace_id);
+		__u32 read_seq = get_tcp_read_seq_from_fd(k.fd);
+		__u32 write_seq = get_tcp_write_seq_from_fd(k.fd);
+		bpf_printk("   [Redis] [End] read_seq:%d write_seq:%d size:%d", read_seq, write_seq, ret);
         e->component_sport = conn->sport;
         e->component_dport = conn->dport;
         __builtin_memcpy(&e->component_saddr, &conn->saddr, sizeof(e->component_saddr));
         __builtin_memcpy(&e->component_daddr, &conn->daddr, sizeof(e->component_daddr));
 		response = is_redis_response(payload, ret, &e->status, e->error_message);
+		if (!response) {
+			bpf_printk("   [Redis] [End] NOT VALID: fd=%d tid=%d", k.fd, tid);
+		}
 	} else if (e->protocol == PROTOCOL_MEMCACHED) {
 		cw_bpf_debug("[Response][MEMCACHE]:thread_id:%d\n", tid);
 		e->component_sport = conn->sport;
@@ -1265,10 +1364,12 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
 	}
 
     if (!response) {
+		bpf_printk("   [Redis] [End] RETURN EARLY: fd=%d tid=%d", k.fd, tid);
         return 0;
     }
 	if (e->trace_id == 0){
 		e->trace_id = get_apm_trace_id(pid,tid);
+		bpf_printk(" ttttttttttttttttttid=0 %llu",e->trace_id);
 	}
 	e->end_at = bpf_ktime_get_ns();
 	e->start_at = req->ns;
@@ -1431,7 +1532,512 @@ PROGTP(l7_http_trace_id)(void * ctx){
     }
     // 保存 trace_id 到psc
     cw_save_parent_tracking_span(cw_parent_span_context);
-    cw_bpf_debug("[Trace Start in l7][HTTP] trace_id:[%llu]\n", cw_parent_span_context->trace_id);
+	struct apm_trace_key_t trace_key = get_apm_trace_key(120 * NS_PER_SEC, true);
+//    bpf_printk("[Trace save header] pid:[%d]\n", trace_key.pid);
     // ---------- 在http请求入口生成 横向串联的trace_id end ----------
     return 0;
-}
+}
+
+//
+//SEC("tracepoint/sched/sched_wakeup")
+//int handle_sched_wakeup(void *ctx)
+//{
+//	__u64 waker_pid_tgid = bpf_get_current_pid_tgid();
+//	__u32 waker_tid = (__u32)waker_pid_tgid;  // 线程 ID
+//	__u32 tgid      = (__u32)(waker_pid_tgid >> 32); // 进程 ID
+//	__u64 now       = bpf_ktime_get_ns();
+//
+//	if (load_filter_pid() != 0 && tgid != load_filter_pid())
+//		return 0;
+//
+//	struct tp_sched_wakeup args = {};
+//	// 4.18 没有 bpf_probe_read_kernel,用 bpf_probe_read
+//	if (bpf_probe_read(&args, sizeof(args), ctx) != 0)
+//		return 0;
+//
+//	__u32 wakee_tid = args.pid;   // 被唤醒的tid
+//
+//	bpf_printk("[sched_wakeup] %u -> %u", waker_tid, wakee_tid);
+//
+//	// 特别关注线程 96415
+//	if (wakee_tid == 96415) {
+//		bpf_printk("[sched_wakeup] *** WAKEUP 96415 by %u ***", waker_tid);
+//	}
+//
+//	// 查找 waker 的上下文
+//	struct thread_ctx_t *t = bpf_map_lookup_elem(&thread_ctx, &waker_tid);
+//
+//	if (t && t->exp_ns > now) {
+//		// waker 有有效上下文,传递给 wakee
+//		struct thread_ctx_t copy = {};
+//		copy.ts_ns = now;
+//		copy.exp_ns = now + 3600ULL * 1000 * 1000; // 1小时
+//		copy.tgid = tgid;
+//		copy.token = t->token; // 继承 token
+//		copy.parent_tid = waker_tid; // waker 成为父线程
+//
+//		bpf_printk("[sched_wakeup] inherit: %u -> %u token=%llu (from waker)", waker_tid, wakee_tid, t->token);
+//
+//		// 保存到 wakee
+//		bpf_map_update_elem(&thread_ctx, &wakee_tid, &copy, BPF_ANY);
+//	} else {
+//		// waker 没有有效上下文,为 wakee 创建新上下文
+//		struct thread_ctx_t copy = {};
+//		copy.ts_ns = now;
+//		copy.exp_ns = now + 3600ULL * 1000 * 1000; // 1小时
+//		copy.tgid = tgid;
+//		copy.token = waker_pid_tgid + now; // 生成新的token
+//		copy.parent_tid = waker_tid; // waker 成为父线程
+//
+//		bpf_printk("[sched_wakeup] create: %u -> %u token=%llu (new token)", waker_tid, wakee_tid, copy.token);
+//		if (t) {
+//			bpf_printk("[sched_wakeup] waker %u context expired: exp_ns=%llu now=%llu", waker_tid, t->exp_ns, now);
+//		} else {
+//			bpf_printk("[sched_wakeup] waker %u has no context", waker_tid);
+//		}
+//
+//		// 保存到 wakee
+//		bpf_map_update_elem(&thread_ctx, &wakee_tid, &copy, BPF_ANY);
+//	}
+//
+//	return 0;
+//}
+
+
+
+// ---------- 小工具 ----------
+static __always_inline bool ctx_valid(const struct thread_ctx_t *c, __u64 now)
+{
+	return c && c->exp_ns > now;
+}
+
+static __always_inline void ctx_from_parent(struct thread_ctx_t *dst,
+                                            const struct thread_ctx_t *src,
+                                            __u32 parent_tid,
+                                            __u32 tgid,
+                                            __u64 now)
+{
+	// 继承 token,刷新时间窗
+	dst->token      = src->token;
+	dst->ts_ns      = now;
+	dst->exp_ns     = now + ms_to_ns(TTL_MS);
+	dst->tgid       = tgid;
+	dst->parent_tid = parent_tid;
+}
+
+
+// ---------- sched_wakeup:只“建边” ----------
+/*
+SEC("tracepoint/sched/sched_wakeup")
+int on_sched_wakeup(void *ctx)
+{
+	__u64 tnow = bpf_ktime_get_ns();
+	__u64 pidtgid = bpf_get_current_pid_tgid();
+	__u32 waker_tid = (__u32)pidtgid;
+	__u32 waker_tgid = (__u32)(pidtgid >> 32);
+	__u64 trace_id = 0;
+	if (load_filter_pid() != 0 && waker_tgid != load_filter_pid())
+		return 0;
+
+	// 直接从 tracepoint 上下文中读取 pid 字段 (offset:24)
+	__u32 wakee_tid = 0;
+	if (bpf_probe_read(&wakee_tid, sizeof(wakee_tid), (void *)(ctx + 24)))
+		return 0;
+
+	bpf_printk("\n [sched_wakeup] [Start] waker=%u wakee=%u",
+	           waker_tid, wakee_tid);// [sched_wakeup] [Start] waker=117427 wakee=117425
+
+	struct thread_ctx_t *wakee_ctx = bpf_map_lookup_elem(&thread_ctx, &wakee_tid); // 异步被唤醒线程
+	if (wakee_ctx) {
+		if (wakee_ctx->is_main_thread == 1) {
+			__u16 level =  wakee_ctx->level;
+			trace_id = wakee_ctx->token;
+			struct thread_ctx_t new_ctx = {};
+			new_ctx.token = trace_id;
+			new_ctx.ts_ns = tnow;
+			new_ctx.exp_ns = tnow + 30ULL * 1000 * 1000;
+			new_ctx.tgid = waker_tgid;
+			new_ctx.parent_tid = wakee_tid;
+			new_ctx.root_thread = wakee_ctx->root_thread;
+			new_ctx.level = level + 1;
+			new_ctx.trace_key = wakee_ctx->trace_key;
+
+			// key是否是主线程
+			struct thread_ctx_t *waker_ctx = bpf_map_lookup_elem(&thread_ctx, &waker_tid);
+			if (waker_ctx && waker_ctx->is_main_thread == 1) {
+				new_ctx.is_main_thread = waker_ctx->is_main_thread;
+			}
+			bpf_map_update_elem(&thread_ctx, &waker_tid, &new_ctx, BPF_ANY);
+			bpf_printk("    [sched_wakeup] [save] [wakee_ctx] key(%u)->val(%u) main(%d)", waker_tid, wakee_tid,
+			           wakee_ctx->is_main_thread);
+		} else{
+			bpf_printk("    [sched_wakeup] [not save] [wakee_ctx] wakee_tid(%d) is_main(%d)", wakee_tid,
+			           wakee_ctx->is_main_thread);// [sched_wakeup] [not save] [wakee_ctx] wakee_tid(117425) is_main(0)
+		}
+	} else {
+		bpf_printk("    [sched_wakeup] [not find prev_ctx] wakee_tid(%u)",wakee_tid);
+//		struct thread_ctx_t *waker_ctx = bpf_map_lookup_elem(&thread_ctx, &waker_tid); // 主线程唤醒
+	}
+
+	struct thread_ctx_t *waker_ctx = bpf_map_lookup_elem(&thread_ctx, &waker_tid); // 异步被唤醒线程
+	if (waker_ctx) {
+		if (waker_ctx->is_main_thread == 1) {
+			__u16 level =  waker_ctx->level;
+			trace_id = waker_ctx->token;
+			struct thread_ctx_t new_ctx = {};
+			new_ctx.token = trace_id;
+			new_ctx.ts_ns = tnow;
+			new_ctx.exp_ns = tnow + 300ULL * 1000 * 1000;
+			new_ctx.tgid = waker_tgid;
+			new_ctx.parent_tid = waker_tid;
+			new_ctx.root_thread = waker_ctx->root_thread;
+			new_ctx.level = level + 1;
+			new_ctx.trace_key = waker_ctx->trace_key;
+			// key是否是主线程
+//			struct thread_ctx_t *wakee_ctx = bpf_map_lookup_elem(&thread_ctx, &wakee_tid);
+			if (wakee_ctx && wakee_ctx->is_main_thread == 1) {
+				new_ctx.is_main_thread = wakee_ctx->is_main_thread;
+			}
+			bpf_map_update_elem(&thread_ctx, &wakee_tid, &new_ctx, BPF_ANY);
+			bpf_printk("    [sched_wakeup] [save] [waker_ctx] key(%u)->val(%u) main(%d)", wakee_tid, waker_tid,
+			           waker_ctx->is_main_thread);
+		} else{
+			bpf_printk("    [sched_wakeup] [not save] [waker_ctx] waker_tid(%d) is_main(%d)", waker_tid,
+			           waker_ctx->is_main_thread);
+		}
+	} else {
+		bpf_printk("    [sched_wakeup] [waker_ctx] [not find prev_ctx] tid:%u",waker_tid);
+	}
+
+// [sched_wakeup] [End] new edge: waker=117427  -> wakee=117425 trace_id=0
+	bpf_printk("[sched_wakeup] [End] new edge: waker=%u  -> wakee=%u trace_id=%llu",
+	           waker_tid, wakee_tid, trace_id);
+
+	return 0;
+//
+//	struct thread_ctx_t *pt = bpf_map_lookup_elem(&thread_ctx, &waker_tid);
+//	if (!pt || pt->exp_ns <= tnow) {
+//		bpf_printk("[sched_wakeup] skip: waker tid=%u no valid ctx (tgid=%u)",
+//		           waker_tid, waker_tgid);
+//		return 0;
+//	}
+//
+//	// 查找旧边
+//	struct edge_t *old = bpf_map_lookup_elem(&wake_edge, &wakee_tid);
+//	if (old && old->exp_ns > tnow) {
+//		bpf_printk("[sched_wakeup] skip: wakee tid=%u already has edge (parent=%u)",
+//		           wakee_tid, old->parent_tid);
+//		return 0;
+//	}
+//
+//	if (old && old->exp_ns <= tnow)
+//		bpf_map_delete_elem(&wake_edge, &wakee_tid);
+//
+//	// 新建边
+//	struct edge_t ne = {};
+//	ne.token = pt->token;
+//	ne.parent_tid = waker_tid;
+//	ne.ts_ns = tnow;
+//	ne.exp_ns = tnow + 3600ULL * 1000 * 1000; // 1小时
+//
+//	bpf_map_update_elem(&wake_edge, &wakee_tid, &ne, BPF_ANY);
+//	bpf_printk("[sched_wakeup] new edge: waker=%u (tgid=%u) -> wakee=%u",
+//	           waker_tid, waker_tgid, wakee_tid);
+//
+//	bpf_printk("[sched_wakeup] token=%llu",
+//	          ne.token);
+
+
+	return 0;
+}
+
+ */
+
+// ---------- sched_switch:将“边”落地为 thread_ctx[next] ----------
+SEC("tracepoint/sched/sched_switch")
+int on_sched_switch(void *ctx) {
+	__u64 tnow = bpf_ktime_get_ns();
+	__u32 prev_tid = 0;
+	__u32 next_tid = 0;
+	__u64 trace_id = 0;
+	// 直接从 tracepoint 上下文中读取字段
+	// prev_pid: offset:24, next_pid: offset:56
+
+	if (bpf_probe_read(&prev_tid, sizeof(prev_tid), (void *)(ctx + 24)))
+		return 0;
+	if (bpf_probe_read(&next_tid, sizeof(next_tid), (void *)(ctx + 56)))
+		return 0;
+	__u32 tgid = (__u32) (bpf_get_current_pid_tgid() >> 32);
+	if (load_filter_pid() != 0 && tgid != load_filter_pid())
+		return 0;
+	if (next_tid == 0) return 0;
+
+//	bpf_printk("[sched_switch] [Start] prev_tid=%u -> next_tid=%u;",
+//	           prev_tid, next_tid);
+
+	struct thread_ctx_t *prev_ctx = bpf_map_lookup_elem(&thread_ctx, &prev_tid);
+	if (prev_ctx) {
+		if (prev_ctx->is_main_thread == 1) {
+			trace_id = prev_ctx->token;
+			struct thread_ctx_t new_ctx = {};
+			__u16 level = prev_ctx->level;
+			new_ctx.token = prev_ctx->token;
+			new_ctx.ts_ns = tnow;
+			new_ctx.exp_ns = tnow + 30ULL * 1000 * 1000;
+			new_ctx.tgid = tgid;
+			new_ctx.parent_tid = prev_tid;
+			new_ctx.root_thread = prev_ctx->root_thread;
+			new_ctx.level = level + 1;
+			new_ctx.trace_key = prev_ctx->trace_key;
+			bpf_map_update_elem(&thread_ctx, &next_tid, &new_ctx, BPF_ANY);
+			bpf_printk("    [sched_switch] [save] %u->%u level(%d)", next_tid, prev_tid, new_ctx.level);
+//			bpf_printk("    [sched_switch] [save] key:pid(%d)", new_ctx.trace_key.pid);
+		} else {
+//			bpf_printk("    [sched_switch] [no save] next_tid(%d) parent_tid(%u)", next_tid, prev_tid);
+//			bpf_printk("    [sched_switch] [no save] is_main_thread(%d) level(%u) trace_id=%llu", prev_ctx->is_main_thread, prev_ctx->level,prev_ctx->token);
+		}
+	}
+
+	bpf_printk("[sched_switch] [End] prev_tid=%u -> next_tid=%u trace=%llu",
+	           prev_tid, next_tid,trace_id);
+	return 0;
+
+
+	struct edge_t *e = bpf_map_lookup_elem(&wake_edge, &next_tid);
+	if (!e || e->exp_ns <= tnow) {
+		if (e)
+			bpf_map_delete_elem(&wake_edge, &next_tid);
+		return 0;
+	}
+
+	// 继承 token
+	struct thread_ctx_t new_ctx = {};
+	new_ctx.token = e->token;
+	new_ctx.ts_ns = tnow;
+	new_ctx.exp_ns = tnow + 36ULL * 1000 * 1000; // 1小时
+	new_ctx.tgid = tgid;
+	new_ctx.parent_tid = e->parent_tid;
+
+	bpf_map_update_elem(&thread_ctx, &next_tid, &new_ctx, BPF_ANY);
+	bpf_map_delete_elem(&wake_edge, &next_tid);
+
+	bpf_printk("[sched_switch] inherit: next=%u <- parent=%u (tgid=%u)",
+	           next_tid, new_ctx.parent_tid, tgid);
+	bpf_printk("[sched_switch] token=%llu",
+	           new_ctx.token);
+	// 额外调试信息
+	bpf_printk("[sched_switch] prev_tid=%u -> next_tid=%u; parent=%u",
+	           prev_tid, next_tid, new_ctx.parent_tid);
+
+	return 0;
+}
+
+static __always_inline __u64 key64(__u32 tgid, __u32 fd){
+	return ((__u64)tgid<<32) | fd;
+}
+
+
+#define EPOLL_CTL_ADD 1
+
+// 1) 记录 epoll_ctl 加入的 eventfd 归属
+SEC("tracepoint/syscalls/sys_enter_epoll_ctl")
+int on_epoll_ctl(struct trace_event_raw_sys_enter__stub *ctx) {
+	__u32 tgid = bpf_get_current_pid_tgid() >> 32;
+
+	if (load_filter_pid() != 0 && tgid != load_filter_pid())
+		return 0;
+
+	int epfd = ctx->args[0];
+	int op   = ctx->args[1];
+	int fd   = ctx->args[2];
+//	if (op != EPOLL_CTL_ADD) return 0;
+
+	// 这里简单记录,若要更严谨,可先判定 fd 是 eventfd(如在 write(eventfd) 时回填)
+//	__u64 k = key64(tgid, fd);
+	__u32 tid = (__u32)bpf_get_current_pid_tgid();
+//	bpf_map_update_elem(&eventfd2owner, &k, &tid, BPF_ANY);
+
+	bpf_printk("[eventfd2owner] epfd=%llu fd=%d owner_tid=%u", epfd, fd, tid);
+	return 0;
+}
+
+SEC("tracepoint/syscalls/sys_enter_epoll_pwait")
+int trace_epoll_wait(struct trace_event_raw_sys_enter__stub *ctx)
+{
+	__u64 pidtgid = bpf_get_current_pid_tgid();
+	__u32 tgid = pidtgid >> 32;
+	__u32 tid  = pidtgid;
+	if (load_filter_pid() != 0 && tgid != load_filter_pid())
+		return 0;
+	int epfd   = ctx->args[0];
+	struct epoll_event *events = (struct epoll_event *)ctx->args[1];
+	int maxevents = ctx->args[2];
+	int timeout   = ctx->args[3];
+//
+	bpf_printk("[epoll_wait-enter] tgid=%u tid=%u epfd=%d",tgid, tid, epfd);
+	return 0;
+}
+
+SEC("tracepoint/syscalls/sys_exit_epoll_pwait")
+int trace_exit_epoll_pwait(struct trace_event_raw_sys_exit__stub *ctx)
+{
+	__u64 pid_tgid = bpf_get_current_pid_tgid();
+	__u32 tid  = (__u32)pid_tgid;
+	__u32 tgid = (__u32)(pid_tgid >> 32);
+	int ret = ctx->ret;
+	if (load_filter_pid() != 0 && tgid != load_filter_pid())
+		return 0;
+	if (ret > 0) {
+		// epoll_wait 返回 ret 个就绪事件
+		bpf_printk("[epoll_wait_exit] tid=%u wake ret=%d", tid, ret);
+	}
+	return 0;
+}
+
+
+//
+////// 3) lettuce 线程领取 trace(在 epoll_wait 返回后)
+//SEC("tracepoint/syscalls/sys_enter_epoll_wait")
+//int on_epoll_wait_exit(struct trace_event_raw_sys_enter__stub *ctx) {
+//	__u32 tgid = bpf_get_current_pid_tgid() >> 32;
+//	int epfd = ctx->args[0];
+//	int op   = ctx->args[1];
+//	int fd   = ctx->args[2];
+////	if (op != EPOLL_CTL_ADD) return 0;
+//
+//	// 这里简单记录,若要更严谨,可先判定 fd 是 eventfd(如在 write(eventfd) 时回填)
+////	__u64 k = key64(tgid, fd);
+//	__u32 tid = (__u32)bpf_get_current_pid_tgid();
+////	bpf_map_update_elem(&eventfd2owner, &k, &tid, BPF_ANY);
+//
+//	bpf_printk("[sys_enter_epoll_wait] epfd=%llu fd=%d owner_tid=%u", epfd, fd, tid);
+//	return 0;
+//}
+
+//	__u32 tid = (__u32)bpf_get_current_pid_tgid();
+//	__u64 *t = bpf_map_lookup_elem(&pending_trace, &tid);
+//	if (!t) return 0;
+//
+//	struct thread_ctx_t ctx_new = {};
+//	ctx_new.token = *t;
+//	ctx_new.ts_ns = bpf_ktime_get_ns();
+//	bpf_map_update_elem(&thread_ctx, &tid, &ctx_new, BPF_ANY);
+//	bpf_map_delete_elem(&pending_trace, &tid);
+//
+//	bpf_printk("[pickup] tid=%u trace=%llu", tid, ctx_new.trace_id);
+//	return 0;
+//}
+
+#define MS_TO_NSS(x)  ((x##ULL) * 1000 * 1000)
+#define EXP_WAKEE_MS 30ULL * 1000 * 1000
+#define EXP_WAKER_MS 300
+
+static __always_inline void clone_ctx_and_update(__u32 dst_tid,
+                                                 const struct thread_ctx_t *src,
+                                                 const struct thread_ctx_t *dst_ctx,  // 👈 新增:目标现有ctx
+                                                 __u32 parent_tid,
+                                                 __u32 tgid,
+                                                 __u64 now_ns,
+                                                 __u64 exp_ms)
+{
+	if (!src) return;
+
+	if (src->is_main_thread != 1) {
+		bpf_printk("    [sched_wakeup] [not save] src_tid(%u) is_main(%d)",
+		           parent_tid, src->is_main_thread);
+		return;
+	}
+
+	struct thread_ctx_t new_ctx = {};
+	new_ctx.token       = src->token;
+	new_ctx.ts_ns       = now_ns;
+	new_ctx.exp_ns      = now_ns + exp_ms;
+	new_ctx.tgid        = tgid;
+	new_ctx.parent_tid  = parent_tid;
+	new_ctx.root_thread = src->root_thread;
+	new_ctx.level       = src->level + 1;
+	new_ctx.trace_key   = src->trace_key;
+
+	// 只有当“目标key已是主线程”才置1;否则保持默认0
+	if (dst_ctx && dst_ctx->is_main_thread == 1) {
+		new_ctx.is_main_thread = 1;
+	}
+
+	bpf_map_update_elem(&thread_ctx, &dst_tid, &new_ctx, BPF_ANY);
+	bpf_printk("    [sched_wakeup] [save] key(%u)->val(%u) new-main(%d)",
+	           dst_tid, parent_tid, new_ctx.is_main_thread);
+}
+
+SEC("tracepoint/sched/sched_wakeup")
+int tp_sched_wakeup(void *ctx)
+{
+	__u64 tnow      = bpf_ktime_get_ns();
+	__u64 pidtgid   = bpf_get_current_pid_tgid();
+	__u32 waker_tid = (__u32)pidtgid;
+	__u32 waker_tgid= (__u32)(pidtgid >> 32);
+
+	if (load_filter_pid() != 0 && waker_tgid != load_filter_pid())
+		return 0;
+
+//	__u32 wakee_tid = ctx->pid;
+
+	__u32 wakee_tid = 0;
+	if (bpf_probe_read(&wakee_tid, sizeof(wakee_tid), (void *)(ctx + 24)))
+		return 0;
+
+	bpf_printk("\n");
+	bpf_printk("[sched_wakeup] [Start] waker=%u wakee=%u", waker_tid, wakee_tid);
+
+	struct thread_ctx_t *wakee_ctx = bpf_map_lookup_elem(&thread_ctx, &wakee_tid);
+	struct thread_ctx_t *waker_ctx = bpf_map_lookup_elem(&thread_ctx, &waker_tid);
+
+	// 🧠 新增逻辑:如果双方都存在且都为主线程,则跳过 update
+	if (wakee_ctx && waker_ctx &&
+	    wakee_ctx->is_main_thread == 1 && waker_ctx->is_main_thread == 1) {
+		bpf_printk("    [sched_wakeup] [skip update] both main threads (waker=%u wakee=%u)",
+		           waker_tid, wakee_tid);
+		return 0;
+	}
+
+	// 路径1:wakee -> waker(目标是 waker,需要把 waker_ctx 传入用于 main 继承判断)
+	clone_ctx_and_update(/*dst_tid=*/waker_tid,
+			/*src=*/wakee_ctx,
+			/*dst_ctx=*/waker_ctx,     // 👈 用目标现有ctx决定是否置 main
+			/*parent_tid=*/wakee_tid,
+			/*tgid=*/waker_tgid,
+			/*now_ns=*/tnow,
+			/*exp_ms=*/EXP_WAKEE_MS);
+
+
+	clone_ctx_and_update(/*dst_tid=*/wakee_tid,
+			/*src=*/waker_ctx,
+			/*dst_ctx=*/wakee_ctx,     // 👈 同上
+			/*parent_tid=*/waker_tid,
+			/*tgid=*/waker_tgid,
+			/*now_ns=*/tnow,
+			/*exp_ms=*/EXP_WAKER_MS);
+
+//	// wakee 为主线程 -> 派生给 waker
+//	clone_ctx_and_update(waker_tid, wakee_ctx, wakee_tid,
+//	                     waker_tgid, tnow, 30ULL * 1000 * 1000);
+//
+//	// waker 为主线程 -> 派生给 wakee
+//	clone_ctx_and_update(wakee_tid, waker_ctx, waker_tid,
+//	                     waker_tgid, tnow, 30ULL * 1000 * 1000);
+
+
+	__u64 trace_id = 0;
+	if (wakee_ctx && wakee_ctx->is_main_thread == 1)
+		trace_id = wakee_ctx->token;
+	else if (waker_ctx && waker_ctx->is_main_thread == 1)
+		trace_id = waker_ctx->token;
+
+	if (!wakee_ctx)
+		bpf_printk("    [sched_wakeup] [not find prev_ctx] wakee_tid(%u)", wakee_tid);
+	if (!waker_ctx)
+		bpf_printk("    [sched_wakeup] [waker_ctx] [not find prev_ctx] tid:%u", waker_tid);
+
+	bpf_printk("[sched_wakeup] [End] new edge: waker=%u -> wakee=%u trace_id=%llu",
+	           waker_tid, wakee_tid, trace_id);
+	return 0;
+}