|
|
@@ -154,11 +154,29 @@ struct {
|
|
|
__uint(max_entries, 10240);
|
|
|
} active_reads SEC(".maps");
|
|
|
|
|
|
+struct l7_request_count_key {
|
|
|
+ __u64 token;
|
|
|
+ __u64 fd;
|
|
|
+};
|
|
|
+
|
|
|
+struct l7_request_count {
|
|
|
+ __u32 start_count;
|
|
|
+ __u32 end_count;
|
|
|
+};
|
|
|
+
|
|
|
+struct {
|
|
|
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
|
|
|
+ __uint(key_size, sizeof(struct l7_request_count_key));
|
|
|
+ __uint(value_size, sizeof(struct l7_request_count));
|
|
|
+ __uint(max_entries, 10240);
|
|
|
+} l7_request_fd_heap SEC(".maps");
|
|
|
+
|
|
|
struct l7_request_key {
|
|
|
__u64 fd;
|
|
|
__u32 pid;
|
|
|
__u16 is_tls;
|
|
|
__s16 stream_id;
|
|
|
+ __u32 read_seq;
|
|
|
};
|
|
|
|
|
|
struct l7_request {
|
|
|
@@ -275,6 +293,36 @@ static __always_inline void cw_string_to_span_context(char *str, struct apm_span
|
|
|
static __always_inline void generate_random_bytes(unsigned char *buff, __u32 size);
|
|
|
static __inline __attribute__((__always_inline__)) void cw_save_parent_tracking_span(struct apm_span_context *sc);
|
|
|
|
|
|
+//static __always_inline int set_root_ctx(__u64 token)
|
|
|
+//{
|
|
|
+// __u64 t = bpf_ktime_get_ns();
|
|
|
+// __u64 pidtgid = bpf_get_current_pid_tgid();
|
|
|
+// __u32 tid = (__u32) pidtgid;
|
|
|
+// __u32 tgid = (__u32)(pidtgid >> 32);
|
|
|
+//
|
|
|
+// struct thread_ctx_t r = {};
|
|
|
+// r.token = token;
|
|
|
+// r.tgid = tgid;
|
|
|
+// r.root_thread = tid;
|
|
|
+// r.ts_ns = t;
|
|
|
+// r.exp_ns = t + 30ULL * 1000 * 1000; // 30ms
|
|
|
+// r.parent_tid = 0;
|
|
|
+// r.is_main_thread = 1;
|
|
|
+// r.level=1;
|
|
|
+//
|
|
|
+// // 同一线程串行处理多个请求:用 BPF_ANY 刷新为"当前请求"
|
|
|
+// bpf_printk("save root tid=%u",tid);
|
|
|
+// bpf_map_update_elem(&thread_ctx, &tid, &r, BPF_ANY);
|
|
|
+//
|
|
|
+// struct thread_ctx_t *next_ctx = bpf_map_lookup_elem(&thread_ctx, &tid);
|
|
|
+// if (next_ctx) {
|
|
|
+// bpf_printk("get ctx token=%llu",next_ctx->token);
|
|
|
+// return 0;
|
|
|
+// }
|
|
|
+//
|
|
|
+// return 0;
|
|
|
+//}
|
|
|
+
|
|
|
static inline __attribute__((__always_inline__))
|
|
|
void send_event(void *ctx, struct l7_event *e, struct connection_id cid, struct connection *conn) {
|
|
|
e->connection_timestamp = conn->timestamp;
|
|
|
@@ -431,15 +479,21 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
|
|
|
|
|
|
struct apm_trace_info_t * start_trace_info = get_trace_info_by_fd(pid, fd);
|
|
|
if (!start_trace_info) {
|
|
|
+ bpf_printk("[Trace] [End] no info");
|
|
|
return -1;
|
|
|
}
|
|
|
__u64 trace_id = start_trace_info->trace_id;
|
|
|
__u32 event_count = cw_get_event_count(trace_id);
|
|
|
// bpf_printk("[Trace End in l7] count(%d) %llu ", event_count, trace_id);
|
|
|
+
|
|
|
+ bpf_printk("[Trace] [End] tid:[%d] FD:[%d] traceid:[%llu]", tid, fd, trace_id);
|
|
|
+
|
|
|
cw_bpf_debug("[uprobeThread/pidpidpidpid][Trace End in l7][HTTP]pid:[%d]--[%lld]", tid, bpf_ktime_get_ns());
|
|
|
cw_bpf_debug("[Trace End in l7][Response][HTTP] event_count:%d", event_count);
|
|
|
cw_bpf_debug("[Trace End in l7][Response][HTTP] pid:%d,fd:%d,trace_id:%llu", tid, fd, trace_id);
|
|
|
|
|
|
+// bpf_printk("[Trace] [End] tid:[%d] FD:[%d] traceid:[%llu]", tid, k.fd, trace_id);
|
|
|
+
|
|
|
// 发送事件到用户空间 start
|
|
|
struct l7_event *e = bpf_map_lookup_elem(&l7_event_heap, &zero);
|
|
|
if (!e) {
|
|
|
@@ -561,7 +615,7 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
|
|
|
|
|
|
if (is_http_request(payload)) {
|
|
|
cw_bpf_debug("");
|
|
|
- cw_bpf_debug("-----[Kernel HTTP Enter]:pid:[%d]|CURRENT-GOID:[%llu]|FD:[%d]", tid, get_current_goroutine(), k.fd);
|
|
|
+// bpf_printk("[HTTP] Start tid:[%d] FD:[%d]", tid, get_current_goroutine(), k.fd);
|
|
|
__u8 type = 0;
|
|
|
__u64 trace_id = 0;
|
|
|
struct apm_trace_key_t trace_key = get_apm_trace_key(120 * NS_PER_SEC, true);
|
|
|
@@ -576,10 +630,25 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
|
|
|
trace_id = trace_info->trace_id;
|
|
|
type = trace_info->type;
|
|
|
}
|
|
|
+ bpf_printk(" [HTTP] [Start] tid:[%d] traceid:[%llu] payload:%s", tid, trace_id, payload);
|
|
|
+
|
|
|
+ // 检查当前线程是否有上下文
|
|
|
+// struct thread_ctx_t *current_ctx = bpf_map_lookup_elem(&thread_ctx, &tid);
|
|
|
+// if (current_ctx) {
|
|
|
+// bpf_printk(" [HTTP] has context: %u->%u token=%llu", tid, current_ctx->parent_tid, current_ctx->token);
|
|
|
+// bpf_printk(" [HTTP] has context: %u->%u token=%llu", tid, current_ctx->root_thread, current_ctx->token);
|
|
|
+// } else {
|
|
|
+// bpf_printk(" [HTTP] tid:%d has NO context", tid);
|
|
|
+// }
|
|
|
+
|
|
|
req->protocol = PROTOCOL_HTTP;
|
|
|
req->trace_id = trace_id;
|
|
|
+// bpf_printk(" [HTTP] [Start] tid:[%d] FD:[%d] traceid:[%llu]", tid, fd, trace_id);
|
|
|
+// bpf_printk(" [HTTP] [Start] payload:%s", payload);
|
|
|
+
|
|
|
// cw_bpf_debug("l7.c111 addr is --------:%d,%s",conn->sport,conn->saddr);
|
|
|
- struct apm_span_context * sc = cw_get_current_tracking_span(trace_info);
|
|
|
+ struct apm_span_context *sc = cw_get_current_tracking_span(trace_info, trace_key, req->assumed_app_id,
|
|
|
+ req->span_id);
|
|
|
if (sc) {
|
|
|
cw_copy_byte_arrays(sc->assumed_app_id, req->assumed_app_id, APM_ASSUMED_APP_ID_SIZE);
|
|
|
cw_copy_byte_arrays(sc->span_id, req->span_id, APM_SPAN_ID_SIZE);
|
|
|
@@ -610,8 +679,18 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
|
|
|
// }
|
|
|
req->protocol = PROTOCOL_POSTGRES;
|
|
|
} else if (is_redis_query(payload, size)) {
|
|
|
- cw_bpf_debug("[Enter][Redis]:TGID:%d|type:%s|FD:%d\n",k.pid,"type",k.fd);
|
|
|
+ __u32 read_seq = get_tcp_read_seq_from_fd(k.fd);
|
|
|
+ __u32 write_seq = get_tcp_write_seq_from_fd(k.fd);
|
|
|
+// bpf_printk(" [Enter][Redis]:s|FD:%d\n",k.pid,"type",k.fd);
|
|
|
+ bpf_printk(" -----------[Redis] [Start] tid:[%d] fd:[%d]", tid,k.fd);
|
|
|
+ bpf_printk(" -----------[Redis] [Start] read_seq:%d write_seq:%d size:%d", read_seq, write_seq, size);
|
|
|
+ __u64 trace_id = get_apm_trace_id(pid, tid);
|
|
|
+ bpf_printk(" -----------[Redis] [Start] traceid:[%llu]", trace_id);
|
|
|
+
|
|
|
+ req->trace_id = trace_id;
|
|
|
+
|
|
|
req->protocol = PROTOCOL_REDIS;
|
|
|
+
|
|
|
} else if (is_memcached_query(payload, size)) {
|
|
|
cw_bpf_debug("[Enter][MEMCACHE]:TGID:%d|type:%s|FD:%d\n",k.pid,"type",k.fd);
|
|
|
req->protocol = PROTOCOL_MEMCACHED;
|
|
|
@@ -860,7 +939,7 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
|
|
|
// fd_trace_key = get_fd_trace_key(pid, k.fd);
|
|
|
//
|
|
|
// // trace info
|
|
|
- struct apm_trace_info_t trace_info = cw_save_trace_info(id,pid, k.fd);
|
|
|
+ struct apm_trace_info_t trace_info = cw_save_trace_info(id, pid, k.fd);
|
|
|
// __u64 uid_base = bpf_ktime_get_ns();
|
|
|
// trace_info.trace_id = bpf_get_current_pid_tgid() + uid_base;
|
|
|
|
|
|
@@ -868,10 +947,17 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
|
|
|
e->trace_type = 0;
|
|
|
e->trace_id = trace_info.trace_id;
|
|
|
cw_bpf_debug("\n");
|
|
|
- cw_bpf_debug("[Trace Start in l7][HTTP]pid:[%d]--[%lld]--trace_id:%llu\n", pid, bpf_ktime_get_ns(),trace_info.trace_id);
|
|
|
+ bpf_printk("[Trace] [start] tid:[%d] trace_id:[%llu] fd:%d\n", pid, trace_info.trace_id, k.fd);
|
|
|
cw_bpf_debug("[Trace Start in l7][Receive][HTTP]tid:[%d]|GOID:[%d]|FD:%d\n", tid, trace_info.trace_key.goid,k.fd);
|
|
|
e->payload_size = ret;
|
|
|
COPY_PAYLOAD(e->payload, ret, payload);
|
|
|
+ __u32 read_seq = get_tcp_read_seq_from_fd(k.fd);
|
|
|
+ __u32 write_seq = get_tcp_write_seq_from_fd(k.fd);
|
|
|
+// bpf_printk(" [Enter][Redis]:s|FD:%d\n",k.pid,"type",k.fd);
|
|
|
+ bpf_printk(" -----------[Trace] [Start] read_seq:%d write_seq:%d size:%d", read_seq, write_seq, ret);
|
|
|
+
|
|
|
+ bpf_printk("[Trace] [start] tid:[%d] payload:%s",tid, payload);
|
|
|
+
|
|
|
|
|
|
// pid_tgid:trace_id
|
|
|
// thread_trace_key =
|
|
|
@@ -937,6 +1023,10 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
|
|
|
struct l7_request *req = bpf_map_lookup_elem(&active_l7_requests, &k);
|
|
|
int response = 0;
|
|
|
if (!req) {
|
|
|
+ __u32 read_seq = get_tcp_read_seq_from_fd(k.fd);
|
|
|
+ __u32 write_seq = get_tcp_write_seq_from_fd(k.fd);
|
|
|
+ bpf_printk(" [Redis] [End] NO REQ: read_seq:%d write_seq:%d size:%d", read_seq, write_seq, ret);
|
|
|
+ bpf_printk(" [Redis] [End] NO REQ: fd=%d tid=%d", k.fd, tid);
|
|
|
// cw_bpf_debug("no req? 6:[0x%x] k.pid:%d, k.fd:%d",b[4],k.pid,k.fd);
|
|
|
if (is_dns_response(payload, ret, &k.stream_id, &e->status)) {
|
|
|
// cw_bpf_debug("dns");
|
|
|
@@ -1021,10 +1111,11 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
|
|
|
// // 请求报文
|
|
|
// bpf_printk("[Response][HTTP222] req-payload:%s",e->payload);
|
|
|
// // 响应报文
|
|
|
-// cw_bpf_debug("[Response][HTTP222] resp-payload:%s",payload);
|
|
|
+ cw_bpf_debug("[Response][HTTP222] resp-payload:%s",payload);
|
|
|
|
|
|
response = is_http_response(payload, &e->status);
|
|
|
// cw_bpf_debug("[Kernel End][HTTP]:pid:[%d]|CURRENT-GOID:[%llu]|trace_id:[%llu]---------\n", tid, get_current_goroutine(),e->trace_id);
|
|
|
+ bpf_printk(" [HTTP] [End] tid:[%d] FD:[%d] traceid:[%llu]", tid, k.fd, e->trace_id);
|
|
|
|
|
|
} else if (e->protocol == PROTOCOL_POSTGRES) {
|
|
|
// __u64 trace_id = get_apm_trace_id(pid, tid);
|
|
|
@@ -1039,15 +1130,23 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
|
|
|
e->method = METHOD_STATEMENT_PREPARE;
|
|
|
}
|
|
|
} else if (e->protocol == PROTOCOL_REDIS) {
|
|
|
+ e->trace_id = req->trace_id;
|
|
|
cw_bpf_debug("[Response][Redis]:TGID:%d|type:%s|FD:%d\n", k.pid, "", k.fd);
|
|
|
// __u64 trace_id = get_apm_trace_id(pid, tid);
|
|
|
// e->trace_id = req->trace_id;
|
|
|
- cw_bpf_debug("[Redis] trace_id:%llu", req->trace_id);
|
|
|
+// bpf_printk(" [Redis] trace_id:%llu", req->trace_id);
|
|
|
+ bpf_printk(" [Redis] [End] tid:[%d] fd:[%d] traceid:[%llu] ", tid, k.fd, req->trace_id);
|
|
|
+ __u32 read_seq = get_tcp_read_seq_from_fd(k.fd);
|
|
|
+ __u32 write_seq = get_tcp_write_seq_from_fd(k.fd);
|
|
|
+ bpf_printk(" [Redis] [End] read_seq:%d write_seq:%d size:%d", read_seq, write_seq, ret);
|
|
|
e->component_sport = conn->sport;
|
|
|
e->component_dport = conn->dport;
|
|
|
__builtin_memcpy(&e->component_saddr, &conn->saddr, sizeof(e->component_saddr));
|
|
|
__builtin_memcpy(&e->component_daddr, &conn->daddr, sizeof(e->component_daddr));
|
|
|
response = is_redis_response(payload, ret, &e->status, e->error_message);
|
|
|
+ if (!response) {
|
|
|
+ bpf_printk(" [Redis] [End] NOT VALID: fd=%d tid=%d", k.fd, tid);
|
|
|
+ }
|
|
|
} else if (e->protocol == PROTOCOL_MEMCACHED) {
|
|
|
cw_bpf_debug("[Response][MEMCACHE]:thread_id:%d\n", tid);
|
|
|
e->component_sport = conn->sport;
|
|
|
@@ -1265,10 +1364,12 @@ int trace_exit_read_common(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long in
|
|
|
}
|
|
|
|
|
|
if (!response) {
|
|
|
+ bpf_printk(" [Redis] [End] RETURN EARLY: fd=%d tid=%d", k.fd, tid);
|
|
|
return 0;
|
|
|
}
|
|
|
if (e->trace_id == 0){
|
|
|
e->trace_id = get_apm_trace_id(pid,tid);
|
|
|
+ bpf_printk(" ttttttttttttttttttid=0 %llu",e->trace_id);
|
|
|
}
|
|
|
e->end_at = bpf_ktime_get_ns();
|
|
|
e->start_at = req->ns;
|
|
|
@@ -1431,7 +1532,512 @@ PROGTP(l7_http_trace_id)(void * ctx){
|
|
|
}
|
|
|
// 保存 trace_id 到psc
|
|
|
cw_save_parent_tracking_span(cw_parent_span_context);
|
|
|
- cw_bpf_debug("[Trace Start in l7][HTTP] trace_id:[%llu]\n", cw_parent_span_context->trace_id);
|
|
|
+ struct apm_trace_key_t trace_key = get_apm_trace_key(120 * NS_PER_SEC, true);
|
|
|
+// bpf_printk("[Trace save header] pid:[%d]\n", trace_key.pid);
|
|
|
// ---------- 在http请求入口生成 横向串联的trace_id end ----------
|
|
|
return 0;
|
|
|
-}
|
|
|
+}
|
|
|
+
|
|
|
+//
|
|
|
+//SEC("tracepoint/sched/sched_wakeup")
|
|
|
+//int handle_sched_wakeup(void *ctx)
|
|
|
+//{
|
|
|
+// __u64 waker_pid_tgid = bpf_get_current_pid_tgid();
|
|
|
+// __u32 waker_tid = (__u32)waker_pid_tgid; // 线程 ID
|
|
|
+// __u32 tgid = (__u32)(waker_pid_tgid >> 32); // 进程 ID
|
|
|
+// __u64 now = bpf_ktime_get_ns();
|
|
|
+//
|
|
|
+// if (load_filter_pid() != 0 && tgid != load_filter_pid())
|
|
|
+// return 0;
|
|
|
+//
|
|
|
+// struct tp_sched_wakeup args = {};
|
|
|
+// // 4.18 没有 bpf_probe_read_kernel,用 bpf_probe_read
|
|
|
+// if (bpf_probe_read(&args, sizeof(args), ctx) != 0)
|
|
|
+// return 0;
|
|
|
+//
|
|
|
+// __u32 wakee_tid = args.pid; // 被唤醒的tid
|
|
|
+//
|
|
|
+// bpf_printk("[sched_wakeup] %u -> %u", waker_tid, wakee_tid);
|
|
|
+//
|
|
|
+// // 特别关注线程 96415
|
|
|
+// if (wakee_tid == 96415) {
|
|
|
+// bpf_printk("[sched_wakeup] *** WAKEUP 96415 by %u ***", waker_tid);
|
|
|
+// }
|
|
|
+//
|
|
|
+// // 查找 waker 的上下文
|
|
|
+// struct thread_ctx_t *t = bpf_map_lookup_elem(&thread_ctx, &waker_tid);
|
|
|
+//
|
|
|
+// if (t && t->exp_ns > now) {
|
|
|
+// // waker 有有效上下文,传递给 wakee
|
|
|
+// struct thread_ctx_t copy = {};
|
|
|
+// copy.ts_ns = now;
|
|
|
+// copy.exp_ns = now + 3600ULL * 1000 * 1000; // 1小时
|
|
|
+// copy.tgid = tgid;
|
|
|
+// copy.token = t->token; // 继承 token
|
|
|
+// copy.parent_tid = waker_tid; // waker 成为父线程
|
|
|
+//
|
|
|
+// bpf_printk("[sched_wakeup] inherit: %u -> %u token=%llu (from waker)", waker_tid, wakee_tid, t->token);
|
|
|
+//
|
|
|
+// // 保存到 wakee
|
|
|
+// bpf_map_update_elem(&thread_ctx, &wakee_tid, ©, BPF_ANY);
|
|
|
+// } else {
|
|
|
+// // waker 没有有效上下文,为 wakee 创建新上下文
|
|
|
+// struct thread_ctx_t copy = {};
|
|
|
+// copy.ts_ns = now;
|
|
|
+// copy.exp_ns = now + 3600ULL * 1000 * 1000; // 1小时
|
|
|
+// copy.tgid = tgid;
|
|
|
+// copy.token = waker_pid_tgid + now; // 生成新的token
|
|
|
+// copy.parent_tid = waker_tid; // waker 成为父线程
|
|
|
+//
|
|
|
+// bpf_printk("[sched_wakeup] create: %u -> %u token=%llu (new token)", waker_tid, wakee_tid, copy.token);
|
|
|
+// if (t) {
|
|
|
+// bpf_printk("[sched_wakeup] waker %u context expired: exp_ns=%llu now=%llu", waker_tid, t->exp_ns, now);
|
|
|
+// } else {
|
|
|
+// bpf_printk("[sched_wakeup] waker %u has no context", waker_tid);
|
|
|
+// }
|
|
|
+//
|
|
|
+// // 保存到 wakee
|
|
|
+// bpf_map_update_elem(&thread_ctx, &wakee_tid, ©, BPF_ANY);
|
|
|
+// }
|
|
|
+//
|
|
|
+// return 0;
|
|
|
+//}
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+// ---------- 小工具 ----------
|
|
|
+static __always_inline bool ctx_valid(const struct thread_ctx_t *c, __u64 now)
|
|
|
+{
|
|
|
+ return c && c->exp_ns > now;
|
|
|
+}
|
|
|
+
|
|
|
+static __always_inline void ctx_from_parent(struct thread_ctx_t *dst,
|
|
|
+ const struct thread_ctx_t *src,
|
|
|
+ __u32 parent_tid,
|
|
|
+ __u32 tgid,
|
|
|
+ __u64 now)
|
|
|
+{
|
|
|
+ // 继承 token,刷新时间窗
|
|
|
+ dst->token = src->token;
|
|
|
+ dst->ts_ns = now;
|
|
|
+ dst->exp_ns = now + ms_to_ns(TTL_MS);
|
|
|
+ dst->tgid = tgid;
|
|
|
+ dst->parent_tid = parent_tid;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+// ---------- sched_wakeup:只“建边” ----------
|
|
|
+/*
|
|
|
+SEC("tracepoint/sched/sched_wakeup")
|
|
|
+int on_sched_wakeup(void *ctx)
|
|
|
+{
|
|
|
+ __u64 tnow = bpf_ktime_get_ns();
|
|
|
+ __u64 pidtgid = bpf_get_current_pid_tgid();
|
|
|
+ __u32 waker_tid = (__u32)pidtgid;
|
|
|
+ __u32 waker_tgid = (__u32)(pidtgid >> 32);
|
|
|
+ __u64 trace_id = 0;
|
|
|
+ if (load_filter_pid() != 0 && waker_tgid != load_filter_pid())
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ // 直接从 tracepoint 上下文中读取 pid 字段 (offset:24)
|
|
|
+ __u32 wakee_tid = 0;
|
|
|
+ if (bpf_probe_read(&wakee_tid, sizeof(wakee_tid), (void *)(ctx + 24)))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ bpf_printk("\n [sched_wakeup] [Start] waker=%u wakee=%u",
|
|
|
+ waker_tid, wakee_tid);// [sched_wakeup] [Start] waker=117427 wakee=117425
|
|
|
+
|
|
|
+ struct thread_ctx_t *wakee_ctx = bpf_map_lookup_elem(&thread_ctx, &wakee_tid); // 异步被唤醒线程
|
|
|
+ if (wakee_ctx) {
|
|
|
+ if (wakee_ctx->is_main_thread == 1) {
|
|
|
+ __u16 level = wakee_ctx->level;
|
|
|
+ trace_id = wakee_ctx->token;
|
|
|
+ struct thread_ctx_t new_ctx = {};
|
|
|
+ new_ctx.token = trace_id;
|
|
|
+ new_ctx.ts_ns = tnow;
|
|
|
+ new_ctx.exp_ns = tnow + 30ULL * 1000 * 1000;
|
|
|
+ new_ctx.tgid = waker_tgid;
|
|
|
+ new_ctx.parent_tid = wakee_tid;
|
|
|
+ new_ctx.root_thread = wakee_ctx->root_thread;
|
|
|
+ new_ctx.level = level + 1;
|
|
|
+ new_ctx.trace_key = wakee_ctx->trace_key;
|
|
|
+
|
|
|
+ // key是否是主线程
|
|
|
+ struct thread_ctx_t *waker_ctx = bpf_map_lookup_elem(&thread_ctx, &waker_tid);
|
|
|
+ if (waker_ctx && waker_ctx->is_main_thread == 1) {
|
|
|
+ new_ctx.is_main_thread = waker_ctx->is_main_thread;
|
|
|
+ }
|
|
|
+ bpf_map_update_elem(&thread_ctx, &waker_tid, &new_ctx, BPF_ANY);
|
|
|
+ bpf_printk(" [sched_wakeup] [save] [wakee_ctx] key(%u)->val(%u) main(%d)", waker_tid, wakee_tid,
|
|
|
+ wakee_ctx->is_main_thread);
|
|
|
+ } else{
|
|
|
+ bpf_printk(" [sched_wakeup] [not save] [wakee_ctx] wakee_tid(%d) is_main(%d)", wakee_tid,
|
|
|
+ wakee_ctx->is_main_thread);// [sched_wakeup] [not save] [wakee_ctx] wakee_tid(117425) is_main(0)
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ bpf_printk(" [sched_wakeup] [not find prev_ctx] wakee_tid(%u)",wakee_tid);
|
|
|
+// struct thread_ctx_t *waker_ctx = bpf_map_lookup_elem(&thread_ctx, &waker_tid); // 主线程唤醒
|
|
|
+ }
|
|
|
+
|
|
|
+ struct thread_ctx_t *waker_ctx = bpf_map_lookup_elem(&thread_ctx, &waker_tid); // 异步被唤醒线程
|
|
|
+ if (waker_ctx) {
|
|
|
+ if (waker_ctx->is_main_thread == 1) {
|
|
|
+ __u16 level = waker_ctx->level;
|
|
|
+ trace_id = waker_ctx->token;
|
|
|
+ struct thread_ctx_t new_ctx = {};
|
|
|
+ new_ctx.token = trace_id;
|
|
|
+ new_ctx.ts_ns = tnow;
|
|
|
+ new_ctx.exp_ns = tnow + 300ULL * 1000 * 1000;
|
|
|
+ new_ctx.tgid = waker_tgid;
|
|
|
+ new_ctx.parent_tid = waker_tid;
|
|
|
+ new_ctx.root_thread = waker_ctx->root_thread;
|
|
|
+ new_ctx.level = level + 1;
|
|
|
+ new_ctx.trace_key = waker_ctx->trace_key;
|
|
|
+ // key是否是主线程
|
|
|
+// struct thread_ctx_t *wakee_ctx = bpf_map_lookup_elem(&thread_ctx, &wakee_tid);
|
|
|
+ if (wakee_ctx && wakee_ctx->is_main_thread == 1) {
|
|
|
+ new_ctx.is_main_thread = wakee_ctx->is_main_thread;
|
|
|
+ }
|
|
|
+ bpf_map_update_elem(&thread_ctx, &wakee_tid, &new_ctx, BPF_ANY);
|
|
|
+ bpf_printk(" [sched_wakeup] [save] [waker_ctx] key(%u)->val(%u) main(%d)", wakee_tid, waker_tid,
|
|
|
+ waker_ctx->is_main_thread);
|
|
|
+ } else{
|
|
|
+ bpf_printk(" [sched_wakeup] [not save] [waker_ctx] waker_tid(%d) is_main(%d)", waker_tid,
|
|
|
+ waker_ctx->is_main_thread);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ bpf_printk(" [sched_wakeup] [waker_ctx] [not find prev_ctx] tid:%u",waker_tid);
|
|
|
+ }
|
|
|
+
|
|
|
+// [sched_wakeup] [End] new edge: waker=117427 -> wakee=117425 trace_id=0
|
|
|
+ bpf_printk("[sched_wakeup] [End] new edge: waker=%u -> wakee=%u trace_id=%llu",
|
|
|
+ waker_tid, wakee_tid, trace_id);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+//
|
|
|
+// struct thread_ctx_t *pt = bpf_map_lookup_elem(&thread_ctx, &waker_tid);
|
|
|
+// if (!pt || pt->exp_ns <= tnow) {
|
|
|
+// bpf_printk("[sched_wakeup] skip: waker tid=%u no valid ctx (tgid=%u)",
|
|
|
+// waker_tid, waker_tgid);
|
|
|
+// return 0;
|
|
|
+// }
|
|
|
+//
|
|
|
+// // 查找旧边
|
|
|
+// struct edge_t *old = bpf_map_lookup_elem(&wake_edge, &wakee_tid);
|
|
|
+// if (old && old->exp_ns > tnow) {
|
|
|
+// bpf_printk("[sched_wakeup] skip: wakee tid=%u already has edge (parent=%u)",
|
|
|
+// wakee_tid, old->parent_tid);
|
|
|
+// return 0;
|
|
|
+// }
|
|
|
+//
|
|
|
+// if (old && old->exp_ns <= tnow)
|
|
|
+// bpf_map_delete_elem(&wake_edge, &wakee_tid);
|
|
|
+//
|
|
|
+// // 新建边
|
|
|
+// struct edge_t ne = {};
|
|
|
+// ne.token = pt->token;
|
|
|
+// ne.parent_tid = waker_tid;
|
|
|
+// ne.ts_ns = tnow;
|
|
|
+// ne.exp_ns = tnow + 3600ULL * 1000 * 1000; // 1小时
|
|
|
+//
|
|
|
+// bpf_map_update_elem(&wake_edge, &wakee_tid, &ne, BPF_ANY);
|
|
|
+// bpf_printk("[sched_wakeup] new edge: waker=%u (tgid=%u) -> wakee=%u",
|
|
|
+// waker_tid, waker_tgid, wakee_tid);
|
|
|
+//
|
|
|
+// bpf_printk("[sched_wakeup] token=%llu",
|
|
|
+// ne.token);
|
|
|
+
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+ */
|
|
|
+
|
|
|
+// ---------- sched_switch:将“边”落地为 thread_ctx[next] ----------
|
|
|
+SEC("tracepoint/sched/sched_switch")
|
|
|
+int on_sched_switch(void *ctx) {
|
|
|
+ __u64 tnow = bpf_ktime_get_ns();
|
|
|
+ __u32 prev_tid = 0;
|
|
|
+ __u32 next_tid = 0;
|
|
|
+ __u64 trace_id = 0;
|
|
|
+ // 直接从 tracepoint 上下文中读取字段
|
|
|
+ // prev_pid: offset:24, next_pid: offset:56
|
|
|
+
|
|
|
+ if (bpf_probe_read(&prev_tid, sizeof(prev_tid), (void *)(ctx + 24)))
|
|
|
+ return 0;
|
|
|
+ if (bpf_probe_read(&next_tid, sizeof(next_tid), (void *)(ctx + 56)))
|
|
|
+ return 0;
|
|
|
+ __u32 tgid = (__u32) (bpf_get_current_pid_tgid() >> 32);
|
|
|
+ if (load_filter_pid() != 0 && tgid != load_filter_pid())
|
|
|
+ return 0;
|
|
|
+ if (next_tid == 0) return 0;
|
|
|
+
|
|
|
+// bpf_printk("[sched_switch] [Start] prev_tid=%u -> next_tid=%u;",
|
|
|
+// prev_tid, next_tid);
|
|
|
+
|
|
|
+ struct thread_ctx_t *prev_ctx = bpf_map_lookup_elem(&thread_ctx, &prev_tid);
|
|
|
+ if (prev_ctx) {
|
|
|
+ if (prev_ctx->is_main_thread == 1) {
|
|
|
+ trace_id = prev_ctx->token;
|
|
|
+ struct thread_ctx_t new_ctx = {};
|
|
|
+ __u16 level = prev_ctx->level;
|
|
|
+ new_ctx.token = prev_ctx->token;
|
|
|
+ new_ctx.ts_ns = tnow;
|
|
|
+ new_ctx.exp_ns = tnow + 30ULL * 1000 * 1000;
|
|
|
+ new_ctx.tgid = tgid;
|
|
|
+ new_ctx.parent_tid = prev_tid;
|
|
|
+ new_ctx.root_thread = prev_ctx->root_thread;
|
|
|
+ new_ctx.level = level + 1;
|
|
|
+ new_ctx.trace_key = prev_ctx->trace_key;
|
|
|
+ bpf_map_update_elem(&thread_ctx, &next_tid, &new_ctx, BPF_ANY);
|
|
|
+ bpf_printk(" [sched_switch] [save] %u->%u level(%d)", next_tid, prev_tid, new_ctx.level);
|
|
|
+// bpf_printk(" [sched_switch] [save] key:pid(%d)", new_ctx.trace_key.pid);
|
|
|
+ } else {
|
|
|
+// bpf_printk(" [sched_switch] [no save] next_tid(%d) parent_tid(%u)", next_tid, prev_tid);
|
|
|
+// bpf_printk(" [sched_switch] [no save] is_main_thread(%d) level(%u) trace_id=%llu", prev_ctx->is_main_thread, prev_ctx->level,prev_ctx->token);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ bpf_printk("[sched_switch] [End] prev_tid=%u -> next_tid=%u trace=%llu",
|
|
|
+ prev_tid, next_tid,trace_id);
|
|
|
+ return 0;
|
|
|
+
|
|
|
+
|
|
|
+ struct edge_t *e = bpf_map_lookup_elem(&wake_edge, &next_tid);
|
|
|
+ if (!e || e->exp_ns <= tnow) {
|
|
|
+ if (e)
|
|
|
+ bpf_map_delete_elem(&wake_edge, &next_tid);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 继承 token
|
|
|
+ struct thread_ctx_t new_ctx = {};
|
|
|
+ new_ctx.token = e->token;
|
|
|
+ new_ctx.ts_ns = tnow;
|
|
|
+ new_ctx.exp_ns = tnow + 36ULL * 1000 * 1000; // 1小时
|
|
|
+ new_ctx.tgid = tgid;
|
|
|
+ new_ctx.parent_tid = e->parent_tid;
|
|
|
+
|
|
|
+ bpf_map_update_elem(&thread_ctx, &next_tid, &new_ctx, BPF_ANY);
|
|
|
+ bpf_map_delete_elem(&wake_edge, &next_tid);
|
|
|
+
|
|
|
+ bpf_printk("[sched_switch] inherit: next=%u <- parent=%u (tgid=%u)",
|
|
|
+ next_tid, new_ctx.parent_tid, tgid);
|
|
|
+ bpf_printk("[sched_switch] token=%llu",
|
|
|
+ new_ctx.token);
|
|
|
+ // 额外调试信息
|
|
|
+ bpf_printk("[sched_switch] prev_tid=%u -> next_tid=%u; parent=%u",
|
|
|
+ prev_tid, next_tid, new_ctx.parent_tid);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static __always_inline __u64 key64(__u32 tgid, __u32 fd){
|
|
|
+ return ((__u64)tgid<<32) | fd;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+#define EPOLL_CTL_ADD 1
|
|
|
+
|
|
|
+// 1) 记录 epoll_ctl 加入的 eventfd 归属
|
|
|
+SEC("tracepoint/syscalls/sys_enter_epoll_ctl")
|
|
|
+int on_epoll_ctl(struct trace_event_raw_sys_enter__stub *ctx) {
|
|
|
+ __u32 tgid = bpf_get_current_pid_tgid() >> 32;
|
|
|
+
|
|
|
+ if (load_filter_pid() != 0 && tgid != load_filter_pid())
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ int epfd = ctx->args[0];
|
|
|
+ int op = ctx->args[1];
|
|
|
+ int fd = ctx->args[2];
|
|
|
+// if (op != EPOLL_CTL_ADD) return 0;
|
|
|
+
|
|
|
+ // 这里简单记录,若要更严谨,可先判定 fd 是 eventfd(如在 write(eventfd) 时回填)
|
|
|
+// __u64 k = key64(tgid, fd);
|
|
|
+ __u32 tid = (__u32)bpf_get_current_pid_tgid();
|
|
|
+// bpf_map_update_elem(&eventfd2owner, &k, &tid, BPF_ANY);
|
|
|
+
|
|
|
+ bpf_printk("[eventfd2owner] epfd=%llu fd=%d owner_tid=%u", epfd, fd, tid);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+SEC("tracepoint/syscalls/sys_enter_epoll_pwait")
|
|
|
+int trace_epoll_wait(struct trace_event_raw_sys_enter__stub *ctx)
|
|
|
+{
|
|
|
+ __u64 pidtgid = bpf_get_current_pid_tgid();
|
|
|
+ __u32 tgid = pidtgid >> 32;
|
|
|
+ __u32 tid = pidtgid;
|
|
|
+ if (load_filter_pid() != 0 && tgid != load_filter_pid())
|
|
|
+ return 0;
|
|
|
+ int epfd = ctx->args[0];
|
|
|
+ struct epoll_event *events = (struct epoll_event *)ctx->args[1];
|
|
|
+ int maxevents = ctx->args[2];
|
|
|
+ int timeout = ctx->args[3];
|
|
|
+//
|
|
|
+ bpf_printk("[epoll_wait-enter] tgid=%u tid=%u epfd=%d",tgid, tid, epfd);
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+SEC("tracepoint/syscalls/sys_exit_epoll_pwait")
|
|
|
+int trace_exit_epoll_pwait(struct trace_event_raw_sys_exit__stub *ctx)
|
|
|
+{
|
|
|
+ __u64 pid_tgid = bpf_get_current_pid_tgid();
|
|
|
+ __u32 tid = (__u32)pid_tgid;
|
|
|
+ __u32 tgid = (__u32)(pid_tgid >> 32);
|
|
|
+ int ret = ctx->ret;
|
|
|
+ if (load_filter_pid() != 0 && tgid != load_filter_pid())
|
|
|
+ return 0;
|
|
|
+ if (ret > 0) {
|
|
|
+ // epoll_wait 返回 ret 个就绪事件
|
|
|
+ bpf_printk("[epoll_wait_exit] tid=%u wake ret=%d", tid, ret);
|
|
|
+ }
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+
|
|
|
+//
|
|
|
+////// 3) lettuce 线程领取 trace(在 epoll_wait 返回后)
|
|
|
+//SEC("tracepoint/syscalls/sys_enter_epoll_wait")
|
|
|
+//int on_epoll_wait_exit(struct trace_event_raw_sys_enter__stub *ctx) {
|
|
|
+// __u32 tgid = bpf_get_current_pid_tgid() >> 32;
|
|
|
+// int epfd = ctx->args[0];
|
|
|
+// int op = ctx->args[1];
|
|
|
+// int fd = ctx->args[2];
|
|
|
+//// if (op != EPOLL_CTL_ADD) return 0;
|
|
|
+//
|
|
|
+// // 这里简单记录,若要更严谨,可先判定 fd 是 eventfd(如在 write(eventfd) 时回填)
|
|
|
+//// __u64 k = key64(tgid, fd);
|
|
|
+// __u32 tid = (__u32)bpf_get_current_pid_tgid();
|
|
|
+//// bpf_map_update_elem(&eventfd2owner, &k, &tid, BPF_ANY);
|
|
|
+//
|
|
|
+// bpf_printk("[sys_enter_epoll_wait] epfd=%llu fd=%d owner_tid=%u", epfd, fd, tid);
|
|
|
+// return 0;
|
|
|
+//}
|
|
|
+
|
|
|
+// __u32 tid = (__u32)bpf_get_current_pid_tgid();
|
|
|
+// __u64 *t = bpf_map_lookup_elem(&pending_trace, &tid);
|
|
|
+// if (!t) return 0;
|
|
|
+//
|
|
|
+// struct thread_ctx_t ctx_new = {};
|
|
|
+// ctx_new.token = *t;
|
|
|
+// ctx_new.ts_ns = bpf_ktime_get_ns();
|
|
|
+// bpf_map_update_elem(&thread_ctx, &tid, &ctx_new, BPF_ANY);
|
|
|
+// bpf_map_delete_elem(&pending_trace, &tid);
|
|
|
+//
|
|
|
+// bpf_printk("[pickup] tid=%u trace=%llu", tid, ctx_new.trace_id);
|
|
|
+// return 0;
|
|
|
+//}
|
|
|
+
|
|
|
+#define MS_TO_NSS(x) ((x##ULL) * 1000 * 1000)
|
|
|
+#define EXP_WAKEE_MS 30ULL * 1000 * 1000
|
|
|
+#define EXP_WAKER_MS 300
|
|
|
+
|
|
|
+static __always_inline void clone_ctx_and_update(__u32 dst_tid,
|
|
|
+ const struct thread_ctx_t *src,
|
|
|
+ const struct thread_ctx_t *dst_ctx, // 👈 新增:目标现有ctx
|
|
|
+ __u32 parent_tid,
|
|
|
+ __u32 tgid,
|
|
|
+ __u64 now_ns,
|
|
|
+ __u64 exp_ms)
|
|
|
+{
|
|
|
+ if (!src) return;
|
|
|
+
|
|
|
+ if (src->is_main_thread != 1) {
|
|
|
+ bpf_printk(" [sched_wakeup] [not save] src_tid(%u) is_main(%d)",
|
|
|
+ parent_tid, src->is_main_thread);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ struct thread_ctx_t new_ctx = {};
|
|
|
+ new_ctx.token = src->token;
|
|
|
+ new_ctx.ts_ns = now_ns;
|
|
|
+ new_ctx.exp_ns = now_ns + exp_ms;
|
|
|
+ new_ctx.tgid = tgid;
|
|
|
+ new_ctx.parent_tid = parent_tid;
|
|
|
+ new_ctx.root_thread = src->root_thread;
|
|
|
+ new_ctx.level = src->level + 1;
|
|
|
+ new_ctx.trace_key = src->trace_key;
|
|
|
+
|
|
|
+ // 只有当“目标key已是主线程”才置1;否则保持默认0
|
|
|
+ if (dst_ctx && dst_ctx->is_main_thread == 1) {
|
|
|
+ new_ctx.is_main_thread = 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ bpf_map_update_elem(&thread_ctx, &dst_tid, &new_ctx, BPF_ANY);
|
|
|
+ bpf_printk(" [sched_wakeup] [save] key(%u)->val(%u) new-main(%d)",
|
|
|
+ dst_tid, parent_tid, new_ctx.is_main_thread);
|
|
|
+}
|
|
|
+
|
|
|
+SEC("tracepoint/sched/sched_wakeup")
|
|
|
+int tp_sched_wakeup(void *ctx)
|
|
|
+{
|
|
|
+ __u64 tnow = bpf_ktime_get_ns();
|
|
|
+ __u64 pidtgid = bpf_get_current_pid_tgid();
|
|
|
+ __u32 waker_tid = (__u32)pidtgid;
|
|
|
+ __u32 waker_tgid= (__u32)(pidtgid >> 32);
|
|
|
+
|
|
|
+ if (load_filter_pid() != 0 && waker_tgid != load_filter_pid())
|
|
|
+ return 0;
|
|
|
+
|
|
|
+// __u32 wakee_tid = ctx->pid;
|
|
|
+
|
|
|
+ __u32 wakee_tid = 0;
|
|
|
+ if (bpf_probe_read(&wakee_tid, sizeof(wakee_tid), (void *)(ctx + 24)))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ bpf_printk("\n");
|
|
|
+ bpf_printk("[sched_wakeup] [Start] waker=%u wakee=%u", waker_tid, wakee_tid);
|
|
|
+
|
|
|
+ struct thread_ctx_t *wakee_ctx = bpf_map_lookup_elem(&thread_ctx, &wakee_tid);
|
|
|
+ struct thread_ctx_t *waker_ctx = bpf_map_lookup_elem(&thread_ctx, &waker_tid);
|
|
|
+
|
|
|
+ // 🧠 新增逻辑:如果双方都存在且都为主线程,则跳过 update
|
|
|
+ if (wakee_ctx && waker_ctx &&
|
|
|
+ wakee_ctx->is_main_thread == 1 && waker_ctx->is_main_thread == 1) {
|
|
|
+ bpf_printk(" [sched_wakeup] [skip update] both main threads (waker=%u wakee=%u)",
|
|
|
+ waker_tid, wakee_tid);
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+
|
|
|
+ // 路径1:wakee -> waker(目标是 waker,需要把 waker_ctx 传入用于 main 继承判断)
|
|
|
+ clone_ctx_and_update(/*dst_tid=*/waker_tid,
|
|
|
+ /*src=*/wakee_ctx,
|
|
|
+ /*dst_ctx=*/waker_ctx, // 👈 用目标现有ctx决定是否置 main
|
|
|
+ /*parent_tid=*/wakee_tid,
|
|
|
+ /*tgid=*/waker_tgid,
|
|
|
+ /*now_ns=*/tnow,
|
|
|
+ /*exp_ms=*/EXP_WAKEE_MS);
|
|
|
+
|
|
|
+
|
|
|
+ clone_ctx_and_update(/*dst_tid=*/wakee_tid,
|
|
|
+ /*src=*/waker_ctx,
|
|
|
+ /*dst_ctx=*/wakee_ctx, // 👈 同上
|
|
|
+ /*parent_tid=*/waker_tid,
|
|
|
+ /*tgid=*/waker_tgid,
|
|
|
+ /*now_ns=*/tnow,
|
|
|
+ /*exp_ms=*/EXP_WAKER_MS);
|
|
|
+
|
|
|
+// // wakee 为主线程 -> 派生给 waker
|
|
|
+// clone_ctx_and_update(waker_tid, wakee_ctx, wakee_tid,
|
|
|
+// waker_tgid, tnow, 30ULL * 1000 * 1000);
|
|
|
+//
|
|
|
+// // waker 为主线程 -> 派生给 wakee
|
|
|
+// clone_ctx_and_update(wakee_tid, waker_ctx, waker_tid,
|
|
|
+// waker_tgid, tnow, 30ULL * 1000 * 1000);
|
|
|
+
|
|
|
+
|
|
|
+ __u64 trace_id = 0;
|
|
|
+ if (wakee_ctx && wakee_ctx->is_main_thread == 1)
|
|
|
+ trace_id = wakee_ctx->token;
|
|
|
+ else if (waker_ctx && waker_ctx->is_main_thread == 1)
|
|
|
+ trace_id = waker_ctx->token;
|
|
|
+
|
|
|
+ if (!wakee_ctx)
|
|
|
+ bpf_printk(" [sched_wakeup] [not find prev_ctx] wakee_tid(%u)", wakee_tid);
|
|
|
+ if (!waker_ctx)
|
|
|
+ bpf_printk(" [sched_wakeup] [waker_ctx] [not find prev_ctx] tid:%u", waker_tid);
|
|
|
+
|
|
|
+ bpf_printk("[sched_wakeup] [End] new edge: waker=%u -> wakee=%u trace_id=%llu",
|
|
|
+ waker_tid, wakee_tid, trace_id);
|
|
|
+ return 0;
|
|
|
+}
|