|
|
@@ -11,6 +11,7 @@ import (
|
|
|
"path/filepath"
|
|
|
"strconv"
|
|
|
"strings"
|
|
|
+ "sync"
|
|
|
"time"
|
|
|
|
|
|
"github.com/coroot/coroot-node-agent/utils"
|
|
|
@@ -19,6 +20,7 @@ import (
|
|
|
"github.com/cilium/ebpf"
|
|
|
"github.com/cilium/ebpf/link"
|
|
|
"github.com/cilium/ebpf/perf"
|
|
|
+ "github.com/coroot/coroot-node-agent/cgroup"
|
|
|
"github.com/coroot/coroot-node-agent/common"
|
|
|
"github.com/coroot/coroot-node-agent/ebpftracer/l7"
|
|
|
"github.com/coroot/coroot-node-agent/ebpftracer/tracer"
|
|
|
@@ -154,6 +156,14 @@ type Tracer struct {
|
|
|
Symbols []debugelf.Symbol
|
|
|
Uprobes []tracer.Uprobe
|
|
|
UprobesMap map[string]tracer.Uprobe
|
|
|
+
|
|
|
+ cgroupLinksMu sync.Mutex
|
|
|
+ cgroupLinks map[string]*cgroupLinkState
|
|
|
+}
|
|
|
+
|
|
|
+type cgroupLinkState struct {
|
|
|
+ refCount int
|
|
|
+ links []link.Link
|
|
|
}
|
|
|
|
|
|
func NewTracer(kernelVersion string, disableL7Tracing, disableE2ETracing, disableStackTracing bool) *Tracer {
|
|
|
@@ -172,6 +182,8 @@ func NewTracer(kernelVersion string, disableL7Tracing, disableE2ETracing, disabl
|
|
|
readers: map[string]*perf.Reader{},
|
|
|
uprobes: map[string]*ebpf.Program{},
|
|
|
links: []link.Link{},
|
|
|
+
|
|
|
+ cgroupLinks: map[string]*cgroupLinkState{},
|
|
|
}
|
|
|
}
|
|
|
|
|
|
@@ -198,6 +210,17 @@ func (t *Tracer) Close() {
|
|
|
klog.WithError(err).Infof("[close] links")
|
|
|
}
|
|
|
}
|
|
|
+ t.cgroupLinksMu.Lock()
|
|
|
+ for cgPath, state := range t.cgroupLinks {
|
|
|
+ for _, l := range state.links {
|
|
|
+ if l != nil {
|
|
|
+ err := l.Close()
|
|
|
+ klog.WithError(err).Infof("[close] cgroup links %s", cgPath)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ t.cgroupLinks = map[string]*cgroupLinkState{}
|
|
|
+ t.cgroupLinksMu.Unlock()
|
|
|
for k, r := range t.readers {
|
|
|
if r != nil {
|
|
|
err := r.Close()
|
|
|
@@ -479,74 +502,13 @@ func (t *Tracer) LinkEbpfProg() error {
|
|
|
|
|
|
case ebpf.SockOps:
|
|
|
klog.Infof("Processing SockOps program: %s", programSpec.SectionName)
|
|
|
- // 获取sockops程序
|
|
|
- sockopsProg, exists := t.collection.Programs["sockops_cb"]
|
|
|
- if !exists {
|
|
|
- klog.Errorf("sockops_cb program not found")
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- // 清理旧的cgroup(可能残留上次agent运行的PID)
|
|
|
- cgroupPath := "/sys/fs/cgroup/ebpf-sockops"
|
|
|
- cleanupCgroup(cgroupPath)
|
|
|
-
|
|
|
- // 重新创建cgroup路径
|
|
|
- if err := os.MkdirAll(cgroupPath, 0755); err != nil {
|
|
|
- klog.Errorf("Failed to create cgroup path: %v", err)
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- // 从环境变量获取要监控的PID
|
|
|
- filterPidStr := os.Getenv("FILTER_PID")
|
|
|
- if filterPidStr == "" {
|
|
|
- klog.Warnf("FILTER_PID environment variable not set, using current process")
|
|
|
- filterPidStr = fmt.Sprint(os.Getpid())
|
|
|
- }
|
|
|
-
|
|
|
- // 将指定PID添加到cgroup
|
|
|
- if err := os.WriteFile(filepath.Join(cgroupPath, "cgroup.procs"), []byte(filterPidStr), 0644); err != nil {
|
|
|
- klog.Errorf("Failed to add process %s to cgroup: %v", filterPidStr, err)
|
|
|
- continue
|
|
|
- }
|
|
|
- klog.Infof("Added process %s to cgroup for monitoring", filterPidStr)
|
|
|
-
|
|
|
- // 附加sockops程序到cgroup
|
|
|
- l, err = link.AttachCgroup(link.CgroupOptions{
|
|
|
- Path: cgroupPath,
|
|
|
- Program: sockopsProg,
|
|
|
- Attach: ebpf.AttachCGroupSockOps,
|
|
|
- })
|
|
|
- if err != nil {
|
|
|
- klog.Errorf("Failed to attach sockops program: %v", err)
|
|
|
- continue
|
|
|
- }
|
|
|
- klog.Infof("Successfully attached sockops program to cgroup: %s", cgroupPath)
|
|
|
+ klog.Infof("Deferring sockops cgroup program linking until a business cgroup is requested")
|
|
|
+ continue
|
|
|
|
|
|
case ebpf.CGroupSKB:
|
|
|
klog.Infof("Processing CGroupSKB program: %s", programSpec.SectionName)
|
|
|
- // 处理cgroup/skb程序
|
|
|
- if programSpec.SectionName == "cgroup/skb" {
|
|
|
- cgroupSkbProg, exists := t.collection.Programs["http_request_handler"]
|
|
|
- if !exists {
|
|
|
- klog.Errorf("http_request_handler program not found")
|
|
|
- continue
|
|
|
- }
|
|
|
-
|
|
|
- // 复用sockops阶段已创建的cgroup路径
|
|
|
- cgroupPath := "/sys/fs/cgroup/ebpf-sockops"
|
|
|
-
|
|
|
- // 附加cgroup/skb程序到cgroup
|
|
|
- l, err = link.AttachCgroup(link.CgroupOptions{
|
|
|
- Path: cgroupPath,
|
|
|
- Program: cgroupSkbProg,
|
|
|
- Attach: ebpf.AttachCGroupInetEgress,
|
|
|
- })
|
|
|
- if err != nil {
|
|
|
- klog.Errorf("Failed to attach cgroup/skb program: %v", err)
|
|
|
- continue
|
|
|
- }
|
|
|
- klog.Infof("Successfully attached cgroup/skb program to cgroup: %s", cgroupPath)
|
|
|
- }
|
|
|
+ klog.Infof("Deferring cgroup/skb program linking until a business cgroup is requested")
|
|
|
+ continue
|
|
|
|
|
|
case ebpf.SkMsg:
|
|
|
klog.Infof("Processing SkMsg program: %s", programSpec.SectionName)
|
|
|
@@ -1177,27 +1139,115 @@ func (t *Tracer) DelKProcInfo(pid uint32) error {
|
|
|
return err
|
|
|
}
|
|
|
|
|
|
-const skMsgCgroupPath = "/sys/fs/cgroup/ebpf-sockops"
|
|
|
+func (t *Tracer) EnsureL4HeaderCgroup(cg *cgroup.Cgroup) (string, error) {
|
|
|
+ if cg == nil {
|
|
|
+ return "", fmt.Errorf("business cgroup is nil")
|
|
|
+ }
|
|
|
+ if cg.Version != cgroup.V2 {
|
|
|
+ return "", fmt.Errorf("l4 header cgroup programs require cgroup v2, got %s", cg.Id)
|
|
|
+ }
|
|
|
+ cgroupPath := cg.FileSystemPath()
|
|
|
+ if cgroupPath == "" {
|
|
|
+ return "", fmt.Errorf("business cgroup path is empty for %s", cg.Id)
|
|
|
+ }
|
|
|
+
|
|
|
+ t.cgroupLinksMu.Lock()
|
|
|
+ if state, ok := t.cgroupLinks[cgroupPath]; ok {
|
|
|
+ state.refCount++
|
|
|
+ t.cgroupLinksMu.Unlock()
|
|
|
+ klog.Infof("[cgroup] reusing business cgroup %s for l4-header, ref=%d", cgroupPath, state.refCount)
|
|
|
+ return cgroupPath, nil
|
|
|
+ }
|
|
|
+ t.cgroupLinksMu.Unlock()
|
|
|
|
|
|
-// AddPidToCgroup 将指定 PID 追加到 sk_msg 所用的 cgroup 中,使其 socket 能被 sockops/sk_msg 程序拦截。
|
|
|
-func (t *Tracer) AddPidToCgroup(pid uint32) error {
|
|
|
- pidStr := fmt.Sprint(pid)
|
|
|
- if err := os.WriteFile(filepath.Join(skMsgCgroupPath, "cgroup.procs"), []byte(pidStr), 0644); err != nil {
|
|
|
- return fmt.Errorf("failed to add pid %d to cgroup %s: %w", pid, skMsgCgroupPath, err)
|
|
|
+ sockopsProg, exists := t.collection.Programs["sockops_cb"]
|
|
|
+ if !exists {
|
|
|
+ return "", fmt.Errorf("sockops_cb program not found")
|
|
|
}
|
|
|
- klog.Infof("[cgroup] added pid %d to %s", pid, skMsgCgroupPath)
|
|
|
- return nil
|
|
|
+ cgroupSkbProg, exists := t.collection.Programs["http_request_handler"]
|
|
|
+ if !exists {
|
|
|
+ return "", fmt.Errorf("http_request_handler program not found")
|
|
|
+ }
|
|
|
+
|
|
|
+ var links []link.Link
|
|
|
+ sockopsLink, err := link.AttachCgroup(link.CgroupOptions{
|
|
|
+ Path: cgroupPath,
|
|
|
+ Program: sockopsProg,
|
|
|
+ Attach: ebpf.AttachCGroupSockOps,
|
|
|
+ })
|
|
|
+ if err != nil {
|
|
|
+ return "", fmt.Errorf("attach sockops program to %s: %w", cgroupPath, err)
|
|
|
+ }
|
|
|
+ links = append(links, sockopsLink)
|
|
|
+
|
|
|
+ cgroupSkbLink, err := link.AttachCgroup(link.CgroupOptions{
|
|
|
+ Path: cgroupPath,
|
|
|
+ Program: cgroupSkbProg,
|
|
|
+ Attach: ebpf.AttachCGroupInetEgress,
|
|
|
+ })
|
|
|
+ if err != nil {
|
|
|
+ _ = sockopsLink.Close()
|
|
|
+ return "", fmt.Errorf("attach cgroup/skb program to %s: %w", cgroupPath, err)
|
|
|
+ }
|
|
|
+ links = append(links, cgroupSkbLink)
|
|
|
+
|
|
|
+ t.cgroupLinksMu.Lock()
|
|
|
+ if state, ok := t.cgroupLinks[cgroupPath]; ok {
|
|
|
+ state.refCount++
|
|
|
+ t.cgroupLinksMu.Unlock()
|
|
|
+ for _, l := range links {
|
|
|
+ _ = l.Close()
|
|
|
+ }
|
|
|
+ klog.Infof("[cgroup] concurrent reuse of business cgroup %s for l4-header, ref=%d", cgroupPath, state.refCount)
|
|
|
+ return cgroupPath, nil
|
|
|
+ }
|
|
|
+ t.cgroupLinks[cgroupPath] = &cgroupLinkState{
|
|
|
+ refCount: 1,
|
|
|
+ links: links,
|
|
|
+ }
|
|
|
+ t.cgroupLinksMu.Unlock()
|
|
|
+
|
|
|
+ klog.Infof("[cgroup] attached l4-header programs to business cgroup %s", cgroupPath)
|
|
|
+ return cgroupPath, nil
|
|
|
}
|
|
|
|
|
|
-// RemovePidFromCgroup 将指定 PID 移回根 cgroup,使其脱离 sk_msg 拦截。
|
|
|
-func (t *Tracer) RemovePidFromCgroup(pid uint32) error {
|
|
|
- rootCgroup := "/sys/fs/cgroup/cgroup.procs"
|
|
|
- pidStr := fmt.Sprint(pid)
|
|
|
- if err := os.WriteFile(rootCgroup, []byte(pidStr), 0644); err != nil {
|
|
|
- return fmt.Errorf("failed to move pid %d to root cgroup: %w", pid, err)
|
|
|
+func (t *Tracer) ReleaseL4HeaderCgroup(cgroupPath string) error {
|
|
|
+ if cgroupPath == "" {
|
|
|
+ return nil
|
|
|
}
|
|
|
- klog.Infof("[cgroup] removed pid %d from %s", pid, skMsgCgroupPath)
|
|
|
- return nil
|
|
|
+
|
|
|
+ var links []link.Link
|
|
|
+ t.cgroupLinksMu.Lock()
|
|
|
+ state, ok := t.cgroupLinks[cgroupPath]
|
|
|
+ if !ok {
|
|
|
+ t.cgroupLinksMu.Unlock()
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ state.refCount--
|
|
|
+ if state.refCount > 0 {
|
|
|
+ ref := state.refCount
|
|
|
+ t.cgroupLinksMu.Unlock()
|
|
|
+ klog.Infof("[cgroup] keep business cgroup %s for l4-header, ref=%d", cgroupPath, ref)
|
|
|
+ return nil
|
|
|
+ }
|
|
|
+ links = state.links
|
|
|
+ delete(t.cgroupLinks, cgroupPath)
|
|
|
+ t.cgroupLinksMu.Unlock()
|
|
|
+
|
|
|
+ var lastErr error
|
|
|
+ for _, l := range links {
|
|
|
+ if l == nil {
|
|
|
+ continue
|
|
|
+ }
|
|
|
+ if err := l.Close(); err != nil {
|
|
|
+ lastErr = err
|
|
|
+ klog.WithError(err).Errorf("[cgroup] close business cgroup link %s", cgroupPath)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if lastErr == nil {
|
|
|
+ klog.Infof("[cgroup] detached l4-header programs from business cgroup %s", cgroupPath)
|
|
|
+ }
|
|
|
+ return lastErr
|
|
|
}
|
|
|
|
|
|
// TODO check language
|