瀏覽代碼

Merge branch 'dev-newcoroot' into dev-newnet

rock 1 年之前
父節點
當前提交
e6636e797d

+ 1 - 1
Makefile2

@@ -1,7 +1,7 @@
 FILTER=
 PARAMS=
 ifeq ($(debug),1)
-PARAMS+=debug=-D_DEBUG_MODE
+PARAMS+=debug=1
 endif
 
 ifdef pid

+ 2 - 1
build.sh

@@ -2,6 +2,7 @@
 rm ./euspace
 make -f Makefile2 all debug=1 pid=1121
 
+
 # pid=`ps aux | grep ebpfdemo81 | grep -v grep | awk '{print $2}'`
 # echo $pid
 # TRACES_ENDPOINT=http://10.0.6.103:8099/docp/api/v2/data/receive BIN_TYPE=go SEND=1 FILTER_PID=$pid WHITE_LIST=".*HandleFunc|.*main.*|.*serverHandler.*|.*ServeHTTP.*" ./euspace --listen="0.0.0.0:8123"
@@ -28,4 +29,4 @@ make -f Makefile2 all debug=1 pid=1121
 
 pid=`pidof java`
 echo $pid
-TRACES_ENDPOINT=http://10.0.6.103:8099/docp/api/v2/data/receive BIN_TYPE=java DBG_PATH="" SEND=1 FILTER_PID=$pid WHITE_LIST="handle*|addw.*" ./euspace  --listen="0.0.0.0:8124"
+TRACES_ENDPOINT=http://10.0.6.103:8099/docp/api/v2/data/receive BIN_TYPE=java DBG_PATH="" SEND=1 FILTER_PID=$pid WHITE_LIST="handle*|addw.*" ./euspace  --listen="0.0.0.0:8124"

+ 5 - 2
cgroup/cgroup.go

@@ -105,7 +105,10 @@ func NewFromProcessCgroupFile(filePath string) (*Cgroup, error) {
 			cg.subsystems[cgType] = path.Join(baseCgroupPath, parts[2])
 		}
 	}
-	if p := cg.subsystems["cpu"]; p != "" {
+	if p := cg.subsystems["name=systemd"]; p != "" {
+		cg.Id = p
+		cg.Version = V1
+	} else if p = cg.subsystems["cpu"]; p != "" {
 		cg.Id = p
 		cg.Version = V1
 	} else {
@@ -164,7 +167,7 @@ func containerByCgroup(path string) (ContainerType, string, error) {
 		if matches == nil {
 			return ContainerTypeUnknown, "", fmt.Errorf("invalid systemd cgroup %s", path)
 		}
-		return ContainerTypeSystemdService, matches[1], nil
+		return ContainerTypeSystemdService, strings.Replace(matches[1], "\\x2d", "-", -1), nil
 	}
 	return ContainerTypeUnknown, "", fmt.Errorf("unknown container: %s", path)
 }

+ 8 - 1
cgroup/cgroup_test.go

@@ -55,6 +55,13 @@ func TestNewFromProcessCgroupFile(t *testing.T) {
 	assert.Equal(t, "73051af271105c07e1f493b34856a77e665e3b0b4fc72f76c807dfbffeb881bd", cg.ContainerId)
 	assert.Equal(t, ContainerTypeDocker, cg.ContainerType)
 
+	cg, err = NewFromProcessCgroupFile(path.Join("fixtures/proc/600/cgroup"))
+	assert.Nil(t, err)
+	assert.Equal(t, V1, cg.Version)
+	assert.Equal(t, "/system.slice/springboot.service", cg.Id)
+	assert.Equal(t, "/system.slice/springboot.service", cg.ContainerId)
+	assert.Equal(t, ContainerTypeSystemdService, cg.ContainerType)
+
 	baseCgroupPath = "/kubepods.slice/kubepods-besteffort.slice/kubepods-besteffort-podc83d0428_58af_41eb_8dba_b9e6eddffe7b.slice/docker-0e612005fd07e7f47e2cd07df99a2b4e909446814d71d0b5e4efc7159dd51252.scope"
 	defer func() {
 		baseCgroupPath = ""
@@ -112,7 +119,7 @@ func TestContainerByCgroup(t *testing.T) {
 
 	typ, id, err = containerByCgroup("/system.slice/system-serial\\x2dgetty.slice")
 	as.Equal(typ, ContainerTypeSystemdService)
-	as.Equal("/system.slice/system-serial\\x2dgetty.slice", id)
+	as.Equal("/system.slice/system-serial-getty.slice", id)
 	as.Nil(err)
 
 	typ, id, err = containerByCgroup("/runtime.slice/kubelet.service")

+ 21 - 23
cgroup/cpu.go

@@ -2,7 +2,7 @@ package cgroup
 
 import (
 	"fmt"
-	"io/ioutil"
+	"os"
 	"path"
 	"strconv"
 	"strings"
@@ -57,28 +57,26 @@ func (cg Cgroup) cpuStatV2() (*CPUStat, error) {
 		UsageSeconds:         float64(vars["usage_usec"]) / 1e6,
 		ThrottledTimeSeconds: float64(vars["throttled_usec"]) / 1e6,
 	}
-	payload, err := ioutil.ReadFile(path.Join(cgRoot, cg.subsystems[""], "cpu.max"))
-	if err != nil {
-		return nil, err
-	}
-	data := strings.TrimSpace(string(payload))
-	parts := strings.Fields(data)
-	if len(parts) != 2 {
-		return nil, fmt.Errorf("invalid cpu.max payload: %s", data)
-	}
-	if parts[0] == "max" { //no limit
-		return res, nil
-	}
-	quotaUs, err := strconv.ParseUint(parts[0], 10, 64)
-	if err != nil {
-		return nil, fmt.Errorf("invalid quota value in cpu.max: %s", parts[0])
-	}
-	periodUs, err := strconv.ParseUint(parts[1], 10, 64)
-	if err != nil {
-		return nil, fmt.Errorf("invalid period value in cpu.max: %s", parts[1])
-	}
-	if periodUs > 0 {
-		res.LimitCores = float64(quotaUs) / float64(periodUs)
+	if payload, err := os.ReadFile(path.Join(cgRoot, cg.subsystems[""], "cpu.max")); err == nil {
+		data := strings.TrimSpace(string(payload))
+		parts := strings.Fields(data)
+		if len(parts) != 2 {
+			return nil, fmt.Errorf("invalid cpu.max payload: %s", data)
+		}
+		if parts[0] == "max" { //no limit
+			return res, nil
+		}
+		quotaUs, err := strconv.ParseUint(parts[0], 10, 64)
+		if err != nil {
+			return nil, fmt.Errorf("invalid quota value in cpu.max: %s", parts[0])
+		}
+		periodUs, err := strconv.ParseUint(parts[1], 10, 64)
+		if err != nil {
+			return nil, fmt.Errorf("invalid period value in cpu.max: %s", parts[1])
+		}
+		if periodUs > 0 {
+			res.LimitCores = float64(quotaUs) / float64(periodUs)
+		}
 	}
 	return res, nil
 }

+ 11 - 0
cgroup/fixtures/proc/600/cgroup

@@ -0,0 +1,11 @@
+11:freezer:/
+10:blkio:/init.scope
+9:hugetlb:/
+8:memory:/init.scope
+7:pids:/init.scope
+6:devices:/init.scope
+5:rdma:/
+4:cpuset:/
+3:net_cls,net_prio:/
+2:cpu,cpuacct:/
+1:name=systemd:/system.slice/springboot.service

+ 1 - 5
cgroup/memory.go

@@ -47,17 +47,13 @@ func (cg *Cgroup) memoryStatV1() (*MemoryStat, error) {
 }
 
 func (cg *Cgroup) memoryStatV2() (*MemoryStat, error) {
-	current, err := readUintFromFile(path.Join(cgRoot, cg.subsystems[""], "memory.current"))
-	if err != nil {
-		return nil, err
-	}
 	vars, err := readVariablesFromFile(path.Join(cgRoot, cg.subsystems[""], "memory.stat"))
 	if err != nil {
 		return nil, err
 	}
 	limit, _ := readUintFromFile(path.Join(cgRoot, cg.subsystems[""], "memory.max"))
 	return &MemoryStat{
-		RSS:   current - vars["file"],
+		RSS:   vars["anon"] + vars["file_mapped"],
 		Cache: vars["file"],
 		Limit: limit,
 	}, nil

+ 2 - 2
cgroup/memory_test.go

@@ -25,14 +25,14 @@ func TestCgroup_MemoryStat(t *testing.T) {
 	cg, _ = NewFromProcessCgroupFile(path.Join("fixtures/proc/400/cgroup"))
 	stat, err = cg.MemoryStat()
 	assert.Nil(t, err)
-	assert.Equal(t, uint64(48648192-1044480), stat.RSS)
+	assert.Equal(t, uint64(44892160+0), stat.RSS)
 	assert.Equal(t, uint64(1044480), stat.Cache)
 	assert.Equal(t, uint64(0), stat.Limit)
 
 	cg, _ = NewFromProcessCgroupFile(path.Join("fixtures/proc/500/cgroup"))
 	stat, err = cg.MemoryStat()
 	assert.Nil(t, err)
-	assert.Equal(t, uint64(131047424-50835456), stat.RSS)
+	assert.Equal(t, uint64(75247616+4038656), stat.RSS)
 	assert.Equal(t, uint64(50835456), stat.Cache)
 	assert.Equal(t, uint64(4294967296), stat.Limit)
 

+ 4 - 4
cgroup/utils.go

@@ -1,7 +1,7 @@
 package cgroup
 
 import (
-	"io/ioutil"
+	"os"
 	"strconv"
 	"strings"
 
@@ -9,7 +9,7 @@ import (
 )
 
 func readVariablesFromFile(filePath string) (map[string]uint64, error) {
-	data, err := ioutil.ReadFile(filePath)
+	data, err := os.ReadFile(filePath)
 	if err != nil {
 		return nil, err
 	}
@@ -29,7 +29,7 @@ func readVariablesFromFile(filePath string) (map[string]uint64, error) {
 }
 
 func readIntFromFile(filePath string) (int64, error) {
-	data, err := ioutil.ReadFile(filePath)
+	data, err := os.ReadFile(filePath)
 	if err != nil {
 		return 0, err
 	}
@@ -37,7 +37,7 @@ func readIntFromFile(filePath string) (int64, error) {
 }
 
 func readUintFromFile(filePath string) (uint64, error) {
-	data, err := ioutil.ReadFile(filePath)
+	data, err := os.ReadFile(filePath)
 	if err != nil {
 		return 0, err
 	}

+ 9 - 7
common/net.go

@@ -17,14 +17,16 @@ var (
 )
 
 func init() {
-	if flags.ExternalNetworksWhitelist != nil {
-		for _, prefix := range *flags.ExternalNetworksWhitelist {
-			p, err := netaddr.ParseIPPrefix(prefix)
-			if err != nil {
-				klog.Fatalf("invalid network %s: %s", prefix, err)
-			}
-			ConnectionFilter.WhitelistPrefix(p)
+	klog.Infoln("whitelisted public IPs:", *flags.ExternalNetworksWhitelist)
+	for _, prefix := range *flags.ExternalNetworksWhitelist {
+		if prefix == "" {
+			continue
 		}
+		p, err := netaddr.ParseIPPrefix(prefix)
+		if err != nil {
+			klog.Fatalf("invalid network %s: %s", prefix, err)
+		}
+		ConnectionFilter.WhitelistPrefix(p)
 	}
 	if r := flags.EphemeralPortRange; r != nil && *r != "" {
 		klog.Infoln("ephemeral-port-range:", *r)

+ 2 - 1
common/otel.go

@@ -9,13 +9,14 @@ var (
 	deploymentPodRegex  = regexp.MustCompile(`(/k8s/[a-z0-9-]+/[a-z0-9-]+)-[0-9a-f]{1,10}-[bcdfghjklmnpqrstvwxz2456789]{5}/.+`)
 	daemonsetPodRegex   = regexp.MustCompile(`(/k8s/[a-z0-9-]+/[a-z0-9-]+)-[bcdfghjklmnpqrstvwxz2456789]{5}/.+`)
 	statefulsetPodRegex = regexp.MustCompile(`(/k8s/[a-z0-9-]+/[a-z0-9-]+)-\d+/.+`)
+	cronjobPodRegex     = regexp.MustCompile(`(/k8s-cronjob/[a-z0-9-]+/[a-z0-9-]+)/.+`)
 )
 
 func ContainerIdToOtelServiceName(containerId string) string {
 	if !strings.HasPrefix(containerId, "/k8s/") {
 		return containerId
 	}
-	for _, r := range []*regexp.Regexp{deploymentPodRegex, daemonsetPodRegex, statefulsetPodRegex} {
+	for _, r := range []*regexp.Regexp{deploymentPodRegex, daemonsetPodRegex, statefulsetPodRegex, cronjobPodRegex} {
 		if g := r.FindStringSubmatch(containerId); len(g) == 2 {
 			return g[1]
 		}

+ 5 - 0
containers/app.go

@@ -13,6 +13,9 @@ var (
 
 func guessApplicationType(cmdline []byte) string {
 	parts := bytes.Split(cmdline, []byte{0})
+	if len(parts) == 0 || len(parts[0]) == 0 {
+		return ""
+	}
 	cmd := bytes.TrimSuffix(bytes.Fields(parts[0])[0], []byte{':'})
 	switch {
 	case bytes.HasSuffix(cmd, []byte("memcached")):
@@ -29,6 +32,8 @@ func guessApplicationType(cmdline []byte) string {
 		return "mongos"
 	case bytes.HasSuffix(cmd, []byte("mysqld")):
 		return "mysql"
+	case bytes.HasSuffix(cmd, []byte("mariadbd")):
+		return "mysql"
 	case bytes.Contains(cmdline, []byte("org.apache.zookeeper.server.quorum.QuorumPeerMain")):
 		return "zookeeper"
 	case bytes.HasSuffix(cmd, []byte("redis-server")):

+ 340 - 69
containers/container.go

@@ -2,12 +2,14 @@ package containers
 
 import (
 	debugelf "debug/elf"
-	. "github.com/coroot/coroot-node-agent/utils/modelse"
 	"os"
+	"sort"
 	"strings"
 	"sync"
 	"time"
 
+	. "github.com/coroot/coroot-node-agent/utils/modelse"
+
 	"github.com/coroot/coroot-node-agent/cgroup"
 	"github.com/coroot/coroot-node-agent/common"
 	"github.com/coroot/coroot-node-agent/ebpftracer"
@@ -23,6 +25,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	klog "github.com/sirupsen/logrus"
 	"github.com/vishvananda/netns"
+	"golang.org/x/exp/maps"
 	"inet.af/netaddr"
 )
 
@@ -46,6 +49,8 @@ type ContainerMetadata struct {
 	logDecoder  logparser.Decoder
 	hostListens map[string][]netaddr.IPPort
 	networks    map[string]ContainerNetwork
+	env         map[string]string
+	systemdTriggeredBy string
 	rootfs      string
 }
 
@@ -73,18 +78,34 @@ type AddrPair struct {
 
 type ActiveConnection struct {
 	Dest       netaddr.IPPort
+	Src        netaddr.IPPort
 	ActualDest netaddr.IPPort
 	Pid        uint32
 	Fd         uint64
 	Timestamp  uint64
 	Closed     time.Time
 
+	BytesSent     uint64
+	BytesReceived uint64
+
 	http2Parser    *l7.Http2Parser
 	postgresParser *l7.PostgresParser
 	mysqlParser    *l7.MysqlParser
 	dmParser       *l7.DmParser
 }
 
+type ActiveAccept struct {
+	Dest      netaddr.IPPort
+	Src       netaddr.IPPort
+	Pid       uint32
+	Fd        uint64
+	Timestamp uint64
+	Closed    time.Time
+
+	BytesSent     uint64
+	BytesReceived uint64
+}
+
 type ListenDetails struct {
 	ClosedAt time.Time
 	NsIPs    []netaddr.IP
@@ -103,6 +124,25 @@ type K8sContainer struct {
 	containerName string
 	pid           string
 }
+type ConnectionStats struct {
+	Count            uint64
+	TotalTime        time.Duration
+	Retransmissions  uint64
+	BytesSent        uint64
+	PerBytesSent     uint64
+	BytesReceived    uint64
+	PerBytesReceived uint64
+	Src              netaddr.IPPort
+	ConEstTime       time.Duration
+	FirstReadTime    uint64
+	FirstWriteTime   uint64
+	NewReadTime      uint64
+}
+
+type AcceptStats struct {
+	BytesSent     uint64
+	BytesReceived uint64
+}
 
 type Container struct {
 	id       ContainerID
@@ -122,19 +162,23 @@ type Container struct {
 	delaysLock  sync.Mutex
 
 	listens map[netaddr.IPPort]map[uint32]*ListenDetails
-	ipsByNs map[string][]netaddr.IP
 
-	connectsSuccessful map[AddrPair]int64           // dst:actual_dst -> count
-	connectsFailed     map[netaddr.IPPort]int64     // dst -> count
-	connectLastAttempt map[netaddr.IPPort]time.Time // dst -> time
+	connectsSuccessful map[AddrPair]*ConnectionStats // dst:actual_dst -> count
+	connectsFailed     map[netaddr.IPPort]int64      // dst -> count
+	connectLastAttempt map[netaddr.IPPort]time.Time  // dst -> time
 	connectionsActive  map[AddrPair]*ActiveConnection
 	connectionsByPidFd map[PidFd]*ActiveConnection
-	retransmits        map[AddrPair]int64 // dst:actual_dst -> count
+
+	acceptsSuccessful map[AddrPair]*AcceptStats
+	acceptLastAttempt map[netaddr.IPPort]time.Time // dst -> time
+	acceptsActive     map[AddrPair]*ActiveAccept
+	acceptsByPidFd    map[PidFd]*ActiveAccept
 
 	l7Stats  L7Stats
 	dnsStats *L7Metrics
 
-	oomKills int
+	oomKills                 int
+	pythonThreadLockWaitTime time.Duration
 
 	mounts map[string]proc.MountInfo
 
@@ -144,6 +188,8 @@ type Container struct {
 	nsConntrack   *Conntrack
 	lbConntracks  []*Conntrack
 
+	registry *Registry
+
 	lock sync.RWMutex
 
 	done chan struct{}
@@ -161,7 +207,7 @@ type Container struct {
 	AppInfo AppInfo
 }
 
-func NewContainer(id ContainerID, cg *cgroup.Cgroup, md *ContainerMetadata, hostConntrack *Conntrack, pid uint32) (*Container, error) {
+func NewContainer(id ContainerID, cg *cgroup.Cgroup, md *ContainerMetadata, hostConntrack *Conntrack, pid uint32, registry *Registry) (*Container, error) {
 	netNs, err := proc.GetNetNs(pid)
 	if err != nil {
 		return nil, err
@@ -177,16 +223,19 @@ func NewContainer(id ContainerID, cg *cgroup.Cgroup, md *ContainerMetadata, host
 		delaysByPid: map[uint32]Delays{},
 
 		listens: map[netaddr.IPPort]map[uint32]*ListenDetails{},
-		ipsByNs: map[string][]netaddr.IP{},
 
-		connectsSuccessful: map[AddrPair]int64{},
+		connectsSuccessful: map[AddrPair]*ConnectionStats{},
 		connectsFailed:     map[netaddr.IPPort]int64{},
 		connectLastAttempt: map[netaddr.IPPort]time.Time{},
 		connectionsActive:  map[AddrPair]*ActiveConnection{},
 		connectionsByPidFd: map[PidFd]*ActiveConnection{},
-		retransmits:        map[AddrPair]int64{},
-		l7Stats:            L7Stats{},
-		dnsStats:           &L7Metrics{},
+		acceptsSuccessful:  map[AddrPair]*AcceptStats{},
+		acceptLastAttempt:  map[netaddr.IPPort]time.Time{},
+		acceptsActive:      map[AddrPair]*ActiveAccept{},
+		acceptsByPidFd:     map[PidFd]*ActiveAccept{},
+
+		l7Stats:  L7Stats{},
+		dnsStats: &L7Metrics{},
 
 		mounts: map[string]proc.MountInfo{},
 
@@ -196,6 +245,7 @@ func NewContainer(id ContainerID, cg *cgroup.Cgroup, md *ContainerMetadata, host
 
 		done:     make(chan struct{}),
 		traceMap: make(map[uint64]*tracing.Trace),
+		registry: registry,
 	}
 
 	for _, n := range md.networks {
@@ -211,18 +261,18 @@ func NewContainer(id ContainerID, cg *cgroup.Cgroup, md *ContainerMetadata, host
 
 	c.runLogParser("")
 
-	go func() {
-		ticker := time.NewTicker(gcInterval)
-		defer ticker.Stop()
-		for {
-			select {
-			case <-c.done:
-				return
-			case t := <-ticker.C:
-				c.gc(t)
-			}
-		}
-	}()
+	// go func() {
+	// 	ticker := time.NewTicker(gcInterval)
+	// 	defer ticker.Stop()
+	// 	for {
+	// 		select {
+	// 		case <-c.done:
+	// 			return
+	// 		case t := <-ticker.C:
+	// 			c.gc(t)
+	// 		}
+	// 	}
+	// }()
 
 	return c, nil
 }
@@ -250,11 +300,13 @@ func (c *Container) Describe(ch chan<- *prometheus.Desc) {
 }
 
 func (c *Container) Collect(ch chan<- prometheus.Metric) {
+	c.registry.updateTrafficStatsIfNecessary()
+
 	c.lock.RLock()
 	defer c.lock.RUnlock()
 
-	if c.metadata.image != "" {
-		ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image)
+	if c.metadata.image != "" || c.metadata.systemdTriggeredBy != "" {
+		ch <- gauge(metrics.ContainerInfo, 1, c.metadata.image, c.metadata.systemdTriggeredBy)
 	}
 
 	ch <- counter(metrics.Restarts, float64(c.restarts))
@@ -308,7 +360,7 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
 	}
 
 	for addr, open := range c.getListens() {
-		ch <- gauge(metrics.NetListenInfo, float64(open), addr.String())
+		ch <- gauge(metrics.NetListenInfo, float64(open), addr.String(), "")
 	}
 	for proxy, addrs := range c.getProxiedListens() {
 		for addr := range addrs {
@@ -316,14 +368,35 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
 		}
 	}
 
-	for d, count := range c.connectsSuccessful {
-		ch <- counter(metrics.NetConnectsSuccessful, float64(count), d.src.String(), d.dst.String())
+	for d, stats := range c.connectsSuccessful {
+		ch <- counter(metrics.NetConnectionsSuccessful, float64(stats.Count), d.src.String(), d.dst.String())
+		ch <- counter(metrics.NetConnectionsTotalTime, stats.TotalTime.Seconds(), d.src.String(), d.dst.String())
+		if stats.Retransmissions > 0 {
+			ch <- counter(metrics.NetRetransmits, float64(stats.Retransmissions), d.src.String(), d.dst.String())
+		}
+		ch <- counter(metrics.NetBytesSent, float64(stats.BytesSent), d.dst.String(), d.dst.String(),stats.Src.String())
+		ch <- counter(metrics.NetBytesReceived, float64(stats.BytesReceived), d.dst.String(), d.dst.String(),stats.Src.String())
+		ch <- counter(metrics.NetBytesSentPer, float64(stats.PerBytesSent), d.dst.String(), d.dst.String(),stats.Src.String())
+		ch <- counter(metrics.NetBytesReceivedPer, float64(stats.PerBytesReceived), d.dst.String(), d.dst.String(),stats.Src.String())
+
+		ch <- counter(metrics.NetDataLatency, float64(stats.FirstReadTime-stats.FirstWriteTime), d.dst.String(), d.dst.String(),stats.Src.String())
+		ch <- counter(metrics.NetDataDuration, float64(stats.NewReadTime-stats.FirstWriteTime), d.dst.String(), d.dst.String(),stats.Src.String())
+		ch <- counter(metrics.NetEstTime, float64(stats.ConEstTime), d.dst.String(), d.dst.String(),stats.Src.String())
+		
+		klog.Infof("c.connectsSuccessful d.src=%s d.dst=%s stats.BytesSent=%d,stats.BytesReceived=%d stats.PerBytesSent=%d,stats.PerBytesReceived=%d,stats.datalatency=%d,stats.dataduration=%d,stats.estTime=%d", stats.Src.String(), d.dst.String(), stats.BytesSent, stats.BytesReceived, stats.PerBytesSent, stats.PerBytesReceived, stats.FirstReadTime-stats.FirstWriteTime, stats.NewReadTime-stats.FirstWriteTime, stats.ConEstTime)
+		stats.PerBytesReceived = 0
+		stats.PerBytesSent = 0
 	}
+
+	// for d, stats := range c.acceptsSuccessful {
+	// 	ch <- counter(metrics.NetAcceptsSuccessful, float64(0), d.src.String(), d.dst.String())
+	// 	ch <- counter(metrics.NetAcceptBytesSent, float64(stats.BytesSent), d.src.String(), d.dst.String())
+	// 	ch <- counter(metrics.NetAcceptBytesReceived, float64(stats.BytesReceived), d.src.String(), d.dst.String())
+
+	// 	klog.Infof("c.acceptsSuccessful d.src=%s d.dst=%s stats.BytesSent=%d,stats.BytesReceived=%d", d.src.String(), d.dst.String(), stats.BytesSent, stats.BytesReceived)
+	// }
 	for dst, count := range c.connectsFailed {
-		ch <- counter(metrics.NetConnectsFailed, float64(count), dst.String())
-	}
-	for d, count := range c.retransmits {
-		ch <- counter(metrics.NetRetransmits, float64(count), d.src.String(), d.dst.String())
+		ch <- counter(metrics.NetConnectionsFailed, float64(count), dst.String())
 	}
 
 	connections := map[AddrPair]int{}
@@ -345,7 +418,14 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
 
 	appTypes := map[string]struct{}{}
 	seenJvms := map[string]bool{}
-	for pid, process := range c.processes {
+	seenDotNetApps := map[string]bool{}
+	pids := maps.Keys(c.processes)
+	sort.Slice(pids, func(i, j int) bool {
+		return pids[i] < pids[j]
+	})
+
+	for _, pid := range pids {
+		process := c.processes[pid]
 		cmdline := proc.GetCmdline(pid)
 		if len(cmdline) == 0 {
 			continue
@@ -354,6 +434,9 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
 		if appType != "" {
 			appTypes[appType] = struct{}{}
 		}
+		if process.isGolangApp {
+			appTypes["golang"] = struct{}{}
+		}
 		switch {
 		case isJvm(cmdline):
 			jvm, jMetrics := jvmMetrics(pid)
@@ -365,12 +448,20 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
 			}
 		case process.dotNetMonitor != nil:
 			appTypes["dotnet"] = struct{}{}
-			process.dotNetMonitor.Collect(ch)
+			appName := process.dotNetMonitor.AppName()
+			if !seenDotNetApps[appName] {
+				seenDotNetApps[appName] = true
+				process.dotNetMonitor.Collect(ch)
+			}
 		}
 	}
 	for appType := range appTypes {
 		ch <- gauge(metrics.ApplicationType, 1, appType)
 	}
+	if c.pythonThreadLockWaitTime > 0 {
+		ch <- counter(metrics.PythonThreadLockWaitTime, c.pythonThreadLockWaitTime.Seconds())
+	}
+
 	if c.dnsStats.Requests != nil {
 		c.dnsStats.Requests.Collect(ch)
 	}
@@ -384,6 +475,7 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
 			ch <- gauge(metrics.NetLatency, rtt, ip.String())
 		}
 	}
+	c.gc(time.Now())
 }
 
 func (c *Container) onProcessStart(pid uint32) *Process {
@@ -395,7 +487,8 @@ func (c *Container) onProcessStart(pid uint32) *Process {
 		return nil
 	}
 	c.zombieAt = time.Time{}
-	p := NewProcess(pid, stats)
+	p := NewProcess(pid, stats, c.registry.tracer)
+
 	if p == nil {
 		return nil
 	}
@@ -465,6 +558,7 @@ func (c *Container) onFileOpen(pid uint32, fd uint64) {
 
 // set
 func (c *Container) onListenOpen(pid uint32, addr netaddr.IPPort, safe bool) {
+	klog.Infof("TCP listen open pid=%d id=%s addr=%s", pid, c.id, addr)
 	if common.PortFilter.ShouldBeSkipped(addr.Port()) {
 		return
 	}
@@ -487,20 +581,18 @@ func (c *Container) onListenOpen(pid uint32, addr netaddr.IPPort, safe bool) {
 			return
 		}
 		defer ns.Close()
-		nsId := ns.UniqueId()
-		ips, ok := c.ipsByNs[nsId]
-		if !ok {
-			if ips, err = proc.GetNsIps(ns); err != nil {
-				klog.Warningln(err)
-			} else {
-				c.ipsByNs[nsId] = ips
-			}
+		ips, err := proc.GetNsIps(ns)
+		if err != nil {
+			klog.Warningln(err)
+			return
 		}
+		klog.Infof("got IPs %s for %s", ips, ns.UniqueId())
 		details.NsIPs = ips
 	}
 }
 
 func (c *Container) onListenClose(pid uint32, addr netaddr.IPPort) {
+	klog.Infof("TCP listen close pid=%d id=%s addr=%s", pid, c.id, addr)
 	c.lock.Lock()
 	defer c.lock.Unlock()
 	if _, byAddr := c.listens[addr]; byAddr {
@@ -512,7 +604,57 @@ func (c *Container) onListenClose(pid uint32, addr netaddr.IPPort) {
 	}
 }
 
-func (c *Container) onConnectionOpen(pid uint32, fd uint64, src, dst netaddr.IPPort, timestamp uint64, failed bool) {
+func (c *Container) onAcceptOpen(pid uint32, fd uint64, src, dst netaddr.IPPort, timestamp uint64, failed bool, duration time.Duration) {
+	klog.Infof("accept pid=%d id=%s dstaddr=%s srcaddr=%s", pid, c.id, dst.IP(), src.IP())
+	// if common.PortFilter.ShouldBeSkipped(dst.Port()) {
+	// 	return
+	// }
+	p := c.processes[pid]
+	if p == nil {
+		return
+	}
+	if dst.IP().IsLoopback() && !p.isHostNs() {
+		return
+	}
+	// actualDst, err := c.getActualDestination(p, src, dst)
+	// if err != nil {
+	// 	if !common.IsNotExist(err) {
+	// 		klog.Warningf("cannot open NetNs for pid %d: %s", pid, err)
+	// 	}
+	// 	return
+	// }
+	// switch {
+	// case actualDst == nil:
+	// 	actualDst = &dst
+	// case actualDst.IP().IsLoopback() && !p.isHostNs():
+	// 	return
+	// }
+	// if common.ConnectionFilter.ShouldBeSkipped(dst.IP(), actualDst.IP()) {
+	// 	return
+	// }
+	c.lock.Lock()
+	defer c.lock.Unlock()
+	if !failed {
+		key := AddrPair{src: dst, dst: src}
+		stats := c.acceptsSuccessful[key]
+		if stats == nil {
+			stats = &AcceptStats{}
+			c.acceptsSuccessful[key] = stats
+		}
+		acceptCon := &ActiveAccept{
+			Dest:      src,
+			Src:       dst,
+			Pid:       pid,
+			Fd:        fd,
+			Timestamp: timestamp,
+		}
+		c.acceptsActive[AddrPair{src: dst, dst: src}] = acceptCon
+		c.acceptsByPidFd[PidFd{Pid: pid, Fd: fd}] = acceptCon
+	}
+	c.acceptLastAttempt[dst] = time.Now()
+}
+
+func (c *Container) onConnectionOpen(pid uint32, fd uint64, src, dst netaddr.IPPort, timestamp uint64, failed bool, duration time.Duration) {
 	if common.PortFilter.ShouldBeSkipped(dst.Port()) {
 		return
 	}
@@ -544,9 +686,19 @@ func (c *Container) onConnectionOpen(pid uint32, fd uint64, src, dst netaddr.IPP
 	if failed {
 		c.connectsFailed[dst]++
 	} else {
-		c.connectsSuccessful[AddrPair{src: dst, dst: *actualDst}]++
+		key := AddrPair{src: dst, dst: *actualDst}
+		stats := c.connectsSuccessful[key]
+		if stats == nil {
+			stats = &ConnectionStats{}
+			c.connectsSuccessful[key] = stats
+		}
+		stats.Count++
+		stats.TotalTime += duration
+		stats.Src = src
+		stats.ConEstTime = duration
 		connection := &ActiveConnection{
 			Dest:       dst,
+			Src:        src,
 			ActualDest: *actualDst,
 			Pid:        pid,
 			Fd:         fd,
@@ -588,15 +740,93 @@ func (c *Container) getActualDestination(p *Process, src, dst netaddr.IPPort) (*
 	return nil, nil
 }
 
-func (c *Container) onConnectionClose(srcDst AddrPair) bool {
+func (c *Container) onConnectionClose(e ebpftracer.Event) {
+	c.lock.Lock()
+	conn := c.connectionsByPidFd[PidFd{Pid: e.Pid, Fd: e.Fd}]
+	c.lock.Unlock()
+	if conn != nil {
+		if conn.Closed.IsZero() {
+			if e.TrafficStats != nil {
+				c.lock.Lock()
+				c.updateConnectionTrafficStats(conn, e.TrafficStats.BytesSent, e.TrafficStats.BytesReceived, e.FirstReadTime, e.FirstWriteTime, e.NewReadTime)
+				c.lock.Unlock()
+			}
+			conn.Closed = time.Now()
+		}
+	}
+}
+
+func (c *Container) onAcceptClose(e ebpftracer.Event) {
+	c.lock.Lock()
+	conn := c.acceptsByPidFd[PidFd{Pid: e.Pid, Fd: e.Fd}]
+	c.lock.Unlock()
+	if conn != nil {
+		if conn.Closed.IsZero() {
+			if e.TrafficStats != nil {
+				c.lock.Lock()
+				c.updateAcceptTrafficStats(conn, e.TrafficStats.BytesSent, e.TrafficStats.BytesReceived)
+				c.lock.Unlock()
+			}
+			conn.Closed = time.Now()
+		}
+	}
+}
+
+func (c *Container) updateTrafficStats(u *TrafficStatsUpdate) {
+	if u == nil {
+		return
+	}
 	c.lock.Lock()
 	defer c.lock.Unlock()
-	conn := c.connectionsActive[srcDst]
-	if conn == nil {
-		return false
+	c.updateConnectionTrafficStats(c.connectionsByPidFd[PidFd{Pid: u.Pid, Fd: u.FD}], u.BytesSent, u.BytesReceived, 0, 0, 0)
+}
+
+func (c *Container) updateConnectionTrafficStats(ac *ActiveConnection, sent, received, firstreadtime, firstwritetime, newreadtime uint64) {
+	if ac == nil {
+		return
 	}
-	conn.Closed = time.Now()
-	return true
+	key := AddrPair{src: ac.Dest, dst: ac.ActualDest}
+	stats := c.connectsSuccessful[key]
+	if stats == nil {
+		stats = &ConnectionStats{}
+		c.connectsSuccessful[key] = stats
+	}
+	if sent > ac.BytesSent {
+		stats.BytesSent += sent - ac.BytesSent
+		stats.PerBytesSent = sent - ac.BytesSent
+	}
+	if received > ac.BytesReceived {
+		stats.BytesReceived += received - ac.BytesReceived
+		stats.PerBytesReceived = received - ac.BytesReceived
+	}
+	if firstreadtime != 0 && firstwritetime != 0 && newreadtime != 0 {
+		stats.FirstReadTime = firstreadtime
+		stats.FirstWriteTime = firstwritetime
+		stats.NewReadTime = newreadtime
+	}
+	ac.BytesSent = sent
+	ac.BytesReceived = received
+}
+
+func (c *Container) updateAcceptTrafficStats(ac *ActiveAccept, sent, received uint64) {
+	if ac == nil {
+		return
+	}
+	klog.Infoln("TCP onConnectionClose5", ac.BytesSent, ac.BytesReceived, ac)
+	key := AddrPair{src: ac.Src, dst: ac.Dest}
+	stats := c.acceptsSuccessful[key]
+	if stats == nil {
+		stats = &AcceptStats{}
+		c.acceptsSuccessful[key] = stats
+	}
+	if sent > ac.BytesSent {
+		stats.BytesSent += sent - ac.BytesSent
+	}
+	if received > ac.BytesReceived {
+		stats.BytesReceived += received - ac.BytesReceived
+	}
+	ac.BytesSent = sent
+	ac.BytesReceived = received
 }
 
 func (c *Container) onDNSRequest(r *l7.RequestData) (map[netaddr.IP]string, string, string) {
@@ -612,10 +842,10 @@ func (c *Container) onDNSRequest(r *l7.RequestData) (map[netaddr.IP]string, stri
 		dnsReq := L7Requests[l7.ProtocolDNS]
 		c.dnsStats.Requests = prometheus.NewCounterVec(
 			prometheus.CounterOpts{Name: dnsReq.Name, Help: dnsReq.Help},
-			[]string{"request_type", "status"},
+			[]string{"request_type", "domain", "status"},
 		)
 	}
-	if m, _ := c.dnsStats.Requests.GetMetricWithLabelValues(t, status); m != nil {
+	if m, _ := c.dnsStats.Requests.GetMetricWithLabelValues(t, fqdn, status); m != nil {
 		m.Inc()
 	}
 	if r.Duration != 0 {
@@ -705,14 +935,20 @@ func (c *Container) onL7Request(pid uint32, fd uint64, timestamp uint64, r *l7.R
 	return nil
 }
 
-func (c *Container) onRetransmit(srcDst AddrPair) bool {
+func (c *Container) onRetransmission(srcDst AddrPair) bool {
 	c.lock.Lock()
 	defer c.lock.Unlock()
 	conn, ok := c.connectionsActive[srcDst]
 	if !ok {
 		return false
 	}
-	c.retransmits[AddrPair{src: srcDst.dst, dst: conn.ActualDest}]++
+	key := AddrPair{src: srcDst.dst, dst: conn.ActualDest}
+	stats := c.connectsSuccessful[key]
+	if stats == nil {
+		stats = &ConnectionStats{}
+		c.connectsSuccessful[key] = stats
+	}
+	stats.Retransmissions++
 	return true
 }
 
@@ -874,6 +1110,12 @@ func (c *Container) ping() map[netaddr.IP]float64 {
 	}
 	targets := make([]netaddr.IP, 0, len(ips))
 	for ip := range ips {
+		if ip.IsLoopback() {
+			continue
+		}
+		if !ip.Is4() { // pinger doesn't support IPv6 yet
+			continue
+		}
 		targets = append(targets, ip)
 	}
 	rtt, err := pinger.Ping(netNs, selfNetNs, targets, pingTimeout)
@@ -944,8 +1186,8 @@ func (c *Container) runLogParser(logPath string) {
 }
 
 func (c *Container) gc(now time.Time) {
-	c.lock.Lock()
-	defer c.lock.Unlock()
+	// c.lock.Lock()
+	// defer c.lock.Unlock()
 
 	established := map[AddrPair]struct{}{}
 	establishedDst := map[netaddr.IPPort]struct{}{}
@@ -953,7 +1195,7 @@ func (c *Container) gc(now time.Time) {
 	seenNamespaces := map[string]bool{}
 	fdMap := map[uint64]struct{}{}
 	for _, p := range c.processes {
-		if seenNamespaces[p.NetNsId] {
+		if seenNamespaces[p.NetNsId()] {
 			continue
 		}
 		sockets, err := proc.GetSockets(p.Pid)
@@ -975,15 +1217,9 @@ func (c *Container) gc(now time.Time) {
 				establishedDst[s.DAddr] = struct{}{}
 			}
 		}
-
-		seenNamespaces[p.NetNsId] = true
+		seenNamespaces[p.NetNsId()] = true
 	}
 
-	for ns := range c.ipsByNs {
-		if !seenNamespaces[ns] {
-			delete(c.ipsByNs, ns)
-		}
-	}
 	c.revalidateListens(now, listens)
 	for srcDst, conn := range c.connectionsActive {
 		pidFd := PidFd{Pid: conn.Pid, Fd: conn.Fd}
@@ -1003,6 +1239,24 @@ func (c *Container) gc(now time.Time) {
 		}
 	}
 
+	for srcDst, conn := range c.acceptsActive {
+		pidFd := PidFd{Pid: conn.Pid, Fd: conn.Fd}
+		if _, ok := established[srcDst]; !ok {
+			delete(c.acceptsActive, srcDst)
+			if conn == c.acceptsByPidFd[pidFd] {
+				delete(c.acceptsByPidFd, pidFd)
+			}
+			continue
+		}
+
+		if !conn.Closed.IsZero() && now.Sub(conn.Closed) > gcInterval {
+			delete(c.acceptsActive, srcDst)
+			if conn == c.acceptsByPidFd[pidFd] {
+				delete(c.acceptsByPidFd, pidFd)
+			}
+		}
+	}
+
 	for _, conn := range c.connectionsByPidFd {
 
 		if _, ok := fdMap[conn.Fd]; !ok {
@@ -1010,6 +1264,13 @@ func (c *Container) gc(now time.Time) {
 		}
 	}
 
+	for _, conn := range c.acceptsByPidFd {
+
+		if _, ok := fdMap[conn.Fd]; !ok {
+			delete(c.acceptsByPidFd, PidFd{Pid: conn.Pid, Fd: conn.Fd})
+		}
+	}
+
 	for dst, at := range c.connectLastAttempt {
 		_, active := establishedDst[dst]
 		if !active && !at.IsZero() && now.Sub(at) > gcInterval {
@@ -1020,9 +1281,17 @@ func (c *Container) gc(now time.Time) {
 					delete(c.connectsSuccessful, d)
 				}
 			}
-			for d := range c.retransmits {
+			c.l7Stats.delete(dst)
+		}
+	}
+
+	for dst, at := range c.acceptLastAttempt {
+		_, active := establishedDst[dst]
+		if !active && !at.IsZero() && now.Sub(at) > gcInterval {
+			delete(c.acceptLastAttempt, dst)
+			for d := range c.acceptsSuccessful {
 				if d.src == dst {
-					delete(c.retransmits, d)
+					delete(c.acceptsSuccessful, d)
 				}
 			}
 			c.l7Stats.delete(dst)
@@ -1067,6 +1336,7 @@ func (c *Container) revalidateListens(now time.Time, actualListens map[netaddr.I
 		for pid := range c.processes {
 			fds, err := proc.ReadFds(pid)
 			if err != nil {
+				klog.Warningln(err)
 				continue
 			}
 			for _, fd := range fds {
@@ -1158,6 +1428,7 @@ func (c *Container) attachTlsUprobes(tracer *ebpftracer.Tracer, pid uint32) erro
 		p.uprobes = append(p.uprobes, goProbes...)
 		p.goTlsUprobesChecked = true
 	}
+
 	return nil
 }
 

+ 5 - 3
containers/container_apm.go

@@ -105,10 +105,12 @@ func (c *Container) onL7RequestApm(pid uint32, fd uint64, timestamp uint64, r *l
 	//if !c.valuableTrace(r.TraceId) {
 	//	return nil
 	//}
-
-	if r.Protocol == l7.ProtocolTrace && c.l7Attach && c.valuableTrace(r.TraceId) {
+	// klog.Infof("====ProtocolTrace+++++ start==== %d %d", pid, r.TraceId)
+	// klog.Infof("====ProtocolTrace===== start==== %d %d", r.Protocol == l7.ProtocolTrace, c.l7Attach)
+	if r.Protocol == l7.ProtocolTrace && c.valuableTrace(r.TraceId) {
+		// klog.Infof("====ProtocolTrace---- start==== %d %d", pid, r.TraceId)
 		if r.TraceStart == TRACE_STATUS {
-			klog.Infof("====ProtocolTrace start==== %d %d", pid, r.TraceId)
+			// klog.Infof("====ProtocolTrace start==== %d %d", pid, r.TraceId)
 			trace, err := c.getOrInitTrace(r.TraceId)
 			if err == nil {
 				method, path, hostIp, port := l7.ParseHttpHost(r.Payload)

+ 19 - 3
containers/crio.go

@@ -8,6 +8,7 @@ import (
 	"net"
 	"net/http"
 	"os"
+	"strings"
 	"time"
 
 	"github.com/coroot/coroot-node-agent/common"
@@ -20,7 +21,6 @@ const crioTimeout = 30 * time.Second
 
 var (
 	crioClient *http.Client
-	crioSocket = proc.HostPath("/var/run/crio/crio.sock")
 )
 
 type CrioContainerInfo struct {
@@ -37,8 +37,23 @@ type CrioVolume struct {
 }
 
 func CrioInit() error {
-	if _, err := os.Stat(crioSocket); err != nil {
-		return err
+	sockets := []string{
+		"/var/run/crio/crio.sock",
+		"/run/crio/crio.sock",
+	}
+	var crioSocket string
+	var err error
+	for _, socket := range sockets {
+		socketHostPath := proc.HostPath(socket)
+		if _, err := os.Stat(socketHostPath); err == nil {
+			crioSocket = socketHostPath
+			break
+		}
+	}
+	if err != nil {
+		return fmt.Errorf("couldn't connect to CRI-O through the following UNIX sockets: [%s]: %s",
+			strings.Join(sockets, ","), err,
+		)
 	}
 	klog.Infoln("cri-o socket:", crioSocket)
 
@@ -50,6 +65,7 @@ func CrioInit() error {
 			DisableCompression: true,
 		},
 	}
+
 	return nil
 }
 

+ 12 - 0
containers/dockerd.go

@@ -56,6 +56,7 @@ func DockerdInspect(containerID string) (*ContainerMetadata, error) {
 		volumes:     map[string]string{},
 		hostListens: map[string][]netaddr.IPPort{},
 		networks:    map[string]ContainerNetwork{},
+		env:         map[string]string{},
 		rootfs:      c.GraphDriver.Data["MergedDir"],
 	}
 	for _, m := range c.Mounts {
@@ -93,6 +94,17 @@ func DockerdInspect(containerID string) (*ContainerMetadata, error) {
 			}
 		}
 	}
+	if c.Config != nil {
+		for _, value := range c.Config.Env {
+			idx := strings.Index(value, "=")
+			if idx < 0 {
+				continue
+			}
+			k := value[:idx]
+			v := value[idx+1:]
+			res.env[k] = v
+		}
+	}
 	return res, nil
 }
 

+ 7 - 2
containers/dotnet.go

@@ -53,6 +53,7 @@ func (m *dotNetMetric) units() string {
 
 type DotNetMonitor struct {
 	pid            uint32
+	appName        string
 	cancel         context.CancelFunc
 	lastUpdate     time.Time
 	runtimeVersion string
@@ -74,8 +75,8 @@ func NewDotNetMonitor(ctx context.Context, pid uint32, appName string) *DotNetMo
 	constLabels := prometheus.Labels{"application": appName}
 
 	m := &DotNetMonitor{
-		pid: pid,
-
+		pid:                           pid,
+		appName:                       appName,
 		info:                          newGaugeVec("container_dotnet_info", "Meta information about the Common Language Runtime (CLR)", constLabels, "runtime_version"),
 		memoryAllocatedBytes:          newCounter("container_dotnet_memory_allocated_bytes_total", "The number of bytes allocated", constLabels),
 		exceptionCount:                newGauge("container_dotnet_exceptions_total", "The number of exceptions that have occurred", constLabels),
@@ -91,6 +92,10 @@ func NewDotNetMonitor(ctx context.Context, pid uint32, appName string) *DotNetMo
 	return m
 }
 
+func (m *DotNetMonitor) AppName() string {
+	return m.appName
+}
+
 func (m *DotNetMonitor) Collect(ch chan<- prometheus.Metric) {
 	if m.lastUpdate.Before(time.Now().Add(-2 * dotNetEventInterval)) {
 		return

+ 103 - 72
containers/metrics.go

@@ -28,12 +28,26 @@ var metrics = struct {
 	DiskWriteOps   *prometheus.Desc
 	DiskWriteBytes *prometheus.Desc
 
-	NetListenInfo         *prometheus.Desc
-	NetConnectsSuccessful *prometheus.Desc
-	NetConnectsFailed     *prometheus.Desc
-	NetConnectionsActive  *prometheus.Desc
-	NetRetransmits        *prometheus.Desc
-	NetLatency            *prometheus.Desc
+	NetListenInfo            *prometheus.Desc
+	NetConnectionsSuccessful *prometheus.Desc
+	NetConnectionsTotalTime  *prometheus.Desc
+	NetConnectionsFailed     *prometheus.Desc
+	NetConnectionsActive     *prometheus.Desc
+	NetRetransmits           *prometheus.Desc
+	NetLatency               *prometheus.Desc
+	NetBytesSent             *prometheus.Desc
+	NetBytesReceived         *prometheus.Desc
+	NetBytesSentPer          *prometheus.Desc
+	NetBytesReceivedPer      *prometheus.Desc
+	NetDataLatency      	 *prometheus.Desc
+	NetDataDuration			 *prometheus.Desc
+	NetEstTime			 	 *prometheus.Desc
+
+
+	NetAcceptsSuccessful 	 *prometheus.Desc
+	NetAcceptsActive     	 *prometheus.Desc
+	NetAcceptBytesSent       *prometheus.Desc
+	NetAcceptBytesReceived    *prometheus.Desc
 
 	LogMessages *prometheus.Desc
 
@@ -45,79 +59,96 @@ var metrics = struct {
 	JvmGCTime            *prometheus.Desc
 	JvmSafepointTime     *prometheus.Desc
 	JvmSafepointSyncTime *prometheus.Desc
-	Ip2Fqdn              *prometheus.Desc
+
+	PythonThreadLockWaitTime *prometheus.Desc
+
+	Ip2Fqdn *prometheus.Desc
 }{
-	ContainerInfo: metric("container_info", "Meta information about the container", "image"),
-
-	Restarts: metric("container_restarts_total", "Number of times the container was restarted"),
-
-	CPULimit:      metric("container_resources_cpu_limit_cores", "CPU limit of the container"),
-	CPUUsage:      metric("container_resources_cpu_usage_seconds_total", "Total CPU time consumed by the container"),
-	CPUDelay:      metric("container_resources_cpu_delay_seconds_total", "Total time duration processes of the container have been waiting for a CPU (while being runnable)"),
-	ThrottledTime: metric("container_resources_cpu_throttled_seconds_total", "Total time duration the container has been throttled"),
-
-	MemoryLimit: metric("container_resources_memory_limit_bytes", "Memory limit of the container"),
-	MemoryRss:   metric("container_resources_memory_rss_bytes", "Amount of physical memory used by the container (doesn't include page cache)"),
-	MemoryCache: metric("container_resources_memory_cache_bytes", "Amount of page cache memory allocated by the container"),
-	OOMKills:    metric("container_oom_kills_total", "Total number of times the container was terminated by the OOM killer"),
-
-	DiskDelay:      metric("container_resources_disk_delay_seconds_total", "Total time duration processes of the container have been waiting fot I/Os to complete"),
-	DiskSize:       metric("container_resources_disk_size_bytes", "Total capacity of the volume", "mount_point", "device", "volume"),
-	DiskUsed:       metric("container_resources_disk_used_bytes", "Used capacity of the volume", "mount_point", "device", "volume"),
-	DiskReserved:   metric("container_resources_disk_reserved_bytes", "Reserved capacity of the volume", "mount_point", "device", "volume"),
-	DiskReadOps:    metric("container_resources_disk_reads_total", "Total number of reads completed successfully by the container", "mount_point", "device", "volume"),
-	DiskReadBytes:  metric("container_resources_disk_read_bytes_total", "Total number of bytes read from the disk by the container", "mount_point", "device", "volume"),
-	DiskWriteOps:   metric("container_resources_disk_writes_total", "Total number of writes completed successfully by the container", "mount_point", "device", "volume"),
-	DiskWriteBytes: metric("container_resources_disk_written_bytes_total", "Total number of bytes written to the disk by the container", "mount_point", "device", "volume"),
-
-	NetListenInfo:         metric("container_net_tcp_listen_info", "Listen address of the container", "listen_addr"),
-	NetConnectsSuccessful: metric("container_net_tcp_successful_connects_total", "Total number of successful TCP connects", "destination", "actual_destination"),
-	NetConnectsFailed:     metric("container_net_tcp_failed_connects_total", "Total number of failed TCP connects", "destination"),
-	NetConnectionsActive:  metric("container_net_tcp_active_connections", "Number of active outbound connections used by the container", "destination", "actual_destination"),
-	NetRetransmits:        metric("container_net_tcp_retransmits_total", "Total number of retransmitted TCP segments", "destination", "actual_destination"),
-	NetLatency:            metric("container_net_latency_seconds", "Round-trip time between the container and a remote IP", "destination_ip"),
-
-	LogMessages: metric("container_log_messages_total", "Number of messages grouped by the automatically extracted repeated pattern", "source", "level", "pattern_hash", "sample"),
-
-	ApplicationType: metric("container_application_type", "Type of the application running in the container (e.g. memcached, postgres, mysql)", "application_type"),
-
-	JvmInfo:              metric("container_jvm_info", "Meta information about the JVM", "jvm", "java_version"),
-	JvmHeapSize:          metric("container_jvm_heap_size_bytes", "Total heap size in bytes", "jvm"),
-	JvmHeapUsed:          metric("container_jvm_heap_used_bytes", "Used heap size in bytes", "jvm"),
-	JvmGCTime:            metric("container_jvm_gc_time_seconds", "Time spent in the given JVM garbage collector in seconds", "jvm", "gc"),
-	JvmSafepointTime:     metric("container_jvm_safepoint_time_seconds", "Time the application has been stopped for safepoint operations in seconds", "jvm"),
-	JvmSafepointSyncTime: metric("container_jvm_safepoint_sync_time_seconds", "Time spent getting to safepoints in seconds", "jvm"),
-	Ip2Fqdn:              metric("ip_to_fqdn", "Mapping IP addresses to FQDNs based on DNS requests initiated by containers", "ip", "fqdn"),
+	ContainerInfo: metric("process_info", "Meta information about the process", "image", "systemd_triggered_by"),
+
+	Restarts: metric("process_restarts_total", "Number of times the process was restarted"),
+
+	CPULimit:      metric("process_resources_cpu_limit_cores", "CPU limit of the process"),
+	CPUUsage:      metric("process_resources_cpu_usage_seconds_total", "Total CPU time consumed by the process"),
+	CPUDelay:      metric("process_resources_cpu_delay_seconds_total", "Total time duration processes of the process have been waiting for a CPU (while being runnable)"),
+	ThrottledTime: metric("process_resources_cpu_throttled_seconds_total", "Total time duration the process has been throttled"),
+
+	MemoryLimit: metric("process_resources_memory_limit_bytes", "Memory limit of the process"),
+	MemoryRss:   metric("process_resources_memory_rss_bytes", "Amount of physical memory used by the process (doesn't include page cache)"),
+	MemoryCache: metric("process_resources_memory_cache_bytes", "Amount of page cache memory allocated by the process"),
+	OOMKills:    metric("process_oom_kills_total", "Total number of times the process was terminated by the OOM killer"),
+
+	DiskDelay:      metric("process_resources_disk_delay_seconds_total", "Total time duration processes of the process have been waiting fot I/Os to complete"),
+	DiskSize:       metric("process_resources_disk_size_bytes", "Total capacity of the volume", "mount_point", "device", "volume"),
+	DiskUsed:       metric("process_resources_disk_used_bytes", "Used capacity of the volume", "mount_point", "device", "volume"),
+	DiskReserved:   metric("process_resources_disk_reserved_bytes", "Reserved capacity of the volume", "mount_point", "device", "volume"),
+	DiskReadOps:    metric("process_resources_disk_reads_total", "Total number of reads completed successfully by the process", "mount_point", "device", "volume"),
+	DiskReadBytes:  metric("process_resources_disk_read_bytes_total", "Total number of bytes read from the disk by the process", "mount_point", "device", "volume"),
+	DiskWriteOps:   metric("process_resources_disk_writes_total", "Total number of writes completed successfully by the process", "mount_point", "device", "volume"),
+	DiskWriteBytes: metric("process_resources_disk_written_bytes_total", "Total number of bytes written to the disk by the process", "mount_point", "device", "volume"),
+
+	NetListenInfo:            metric("process_net_tcp_listen_info", "Listen address of the process", "listen_addr", "proxy"),
+	NetConnectionsSuccessful: metric("process_net_tcp_successful_connects_total", "Total number of successful TCP connects", "destination", "actual_destination"),
+	NetConnectionsTotalTime:  metric("process_net_tcp_connection_time_seconds_total", "Time spent on TCP connections", "destination", "actual_destination"),
+	NetConnectionsFailed:     metric("process_net_tcp_failed_connects_total", "Total number of failed TCP connects", "destination"),
+	NetConnectionsActive:     metric("process_net_tcp_active_connections", "Number of active outbound connections used by the process", "destination", "actual_destination"),
+	NetRetransmits:           metric("process_net_tcp_retransmits_total", "Total number of retransmitted TCP segments", "destination", "actual_destination"),
+	NetLatency:               metric("process_net_latency_seconds", "Round-trip time between the process and a remote IP", "destination_ip"),
+	NetBytesSent:             metric("process_net_tcp_bytes_sent_total", "Total number of bytes sent to the peer", "destination", "actual_destination", "src"),
+	NetBytesReceived:         metric("process_net_tcp_bytes_received_total", "Total number of bytes received from the peer", "destination", "actual_destination", "src"),
+	NetBytesSentPer:          metric("process_net_tcp_bytes_sent_per", "Per number of bytes sent to the peer", "destination", "actual_destination", "src"),
+	NetBytesReceivedPer:      metric("process_net_tcp_bytes_received_per", "Per number of bytes received from the peer", "destination", "actual_destination", "src"),
+	NetAcceptsSuccessful: 	  metric("process_net_tcp_successful_accept_total", "Total number of successful TCP accepts", "destination", "actual_destination"),
+	NetAcceptBytesSent:       metric("process_net_tcp_bytes_sent_accept_total", "Total number of bytes sent to the peer", "destination", "actual_destination"),
+	NetAcceptBytesReceived:   metric("process_net_tcp_bytes_received_accept_total", "Total number of bytes received from the peer", "destination", "actual_destination"),
+	NetDataLatency:      	  metric("process_net_tcp_data_latency", "Data latency", "destination", "actual_destination", "src"),
+	NetDataDuration:      	  metric("process_net_tcp_data_duration", "Data duration", "destination", "actual_destination", "src"),
+	NetEstTime:      	  	  metric("process_net_tcp_est_time", "Established time", "destination", "actual_destination", "src"),
+	
+	LogMessages: metric("process_log_messages_total", "Number of messages grouped by the automatically extracted repeated pattern", "source", "level", "pattern_hash", "sample"),
+
+	ApplicationType: metric("process_application_type", "Type of the application running in the process (e.g. memcached, postgres, mysql)", "application_type"),
+
+	JvmInfo:              metric("process_jvm_info", "Meta information about the JVM", "jvm", "java_version"),
+	JvmHeapSize:          metric("process_jvm_heap_size_bytes", "Total heap size in bytes", "jvm"),
+	JvmHeapUsed:          metric("process_jvm_heap_used_bytes", "Used heap size in bytes", "jvm"),
+	JvmGCTime:            metric("process_jvm_gc_time_seconds", "Time spent in the given JVM garbage collector in seconds", "jvm", "gc"),
+	JvmSafepointTime:     metric("process_jvm_safepoint_time_seconds", "Time the application has been stopped for safepoint operations in seconds", "jvm"),
+	JvmSafepointSyncTime: metric("process_jvm_safepoint_sync_time_seconds", "Time spent getting to safepoints in seconds", "jvm"),
+
+	Ip2Fqdn: metric("ip_to_fqdn", "Mapping IP addresses to FQDNs based on DNS requests initiated by processs", "ip", "fqdn"),
+
+	PythonThreadLockWaitTime: metric("process_python_thread_lock_wait_time_seconds", "Time spent waiting acquiring GIL in seconds"),
 }
 
 var (
 	L7Requests = map[l7.Protocol]prometheus.CounterOpts{
-		l7.ProtocolHTTP:      {Name: "container_http_requests_total", Help: "Total number of outbound HTTP requests"},
-		l7.ProtocolPostgres:  {Name: "container_postgres_queries_total", Help: "Total number of outbound Postgres queries"},
-		l7.ProtocolRedis:     {Name: "container_redis_queries_total", Help: "Total number of outbound Redis queries"},
-		l7.ProtocolMemcached: {Name: "container_memcached_queries_total", Help: "Total number of outbound Memcached queries"},
-		l7.ProtocolMysql:     {Name: "container_mysql_queries_total", Help: "Total number of outbound Mysql queries"},
-		l7.ProtocolMongo:     {Name: "container_mongo_queries_total", Help: "Total number of outbound Mongo queries"},
-		l7.ProtocolKafka:     {Name: "container_kafka_requests_total", Help: "Total number of outbound Kafka requests"},
-		l7.ProtocolCassandra: {Name: "container_cassandra_queries_total", Help: "Total number of outbound Cassandra requests"},
-		l7.ProtocolRabbitmq:  {Name: "container_rabbitmq_messages_total", Help: "Total number of Rabbitmq messages produced or consumed by the container"},
-		l7.ProtocolNats:      {Name: "container_nats_messages_total", Help: "Total number of NATS messages produced or consumed by the container"},
-		l7.ProtocolDubbo2:    {Name: "container_dubbo_requests_total", Help: "Total number of outbound DUBBO requests"},
-		l7.ProtocolDNS:       {Name: "container_dns_requests_total", Help: "Total number of outbound DNS requests"},
-		l7.ProtocolDM:        {Name: "container_dm_queries_total", Help: "Total number of outbound DaMeng requests"},
+		l7.ProtocolHTTP:      {Name: "process_http_requests_total", Help: "Total number of outbound HTTP requests"},
+		l7.ProtocolPostgres:  {Name: "process_postgres_queries_total", Help: "Total number of outbound Postgres queries"},
+		l7.ProtocolRedis:     {Name: "process_redis_queries_total", Help: "Total number of outbound Redis queries"},
+		l7.ProtocolMemcached: {Name: "process_memcached_queries_total", Help: "Total number of outbound Memcached queries"},
+		l7.ProtocolMysql:     {Name: "process_mysql_queries_total", Help: "Total number of outbound Mysql queries"},
+		l7.ProtocolMongo:     {Name: "process_mongo_queries_total", Help: "Total number of outbound Mongo queries"},
+		l7.ProtocolKafka:     {Name: "process_kafka_requests_total", Help: "Total number of outbound Kafka requests"},
+		l7.ProtocolCassandra: {Name: "process_cassandra_queries_total", Help: "Total number of outbound Cassandra requests"},
+		l7.ProtocolRabbitmq:  {Name: "process_rabbitmq_messages_total", Help: "Total number of Rabbitmq messages produced or consumed by the process"},
+		l7.ProtocolNats:      {Name: "process_nats_messages_total", Help: "Total number of NATS messages produced or consumed by the process"},
+		l7.ProtocolDubbo2:    {Name: "process_dubbo_requests_total", Help: "Total number of outbound DUBBO requests"},
+		l7.ProtocolDNS:       {Name: "process_dns_requests_total", Help: "Total number of outbound DNS requests"},
+		l7.ProtocolDM:        {Name: "process_dm_queries_total", Help: "Total number of outbound DaMeng requests"},
 	}
 	L7Latency = map[l7.Protocol]prometheus.HistogramOpts{
-		l7.ProtocolHTTP:      {Name: "container_http_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound HTTP request"},
-		l7.ProtocolPostgres:  {Name: "container_postgres_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Postgres query"},
-		l7.ProtocolRedis:     {Name: "container_redis_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Redis query"},
-		l7.ProtocolMemcached: {Name: "container_memcached_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Memcached query"},
-		l7.ProtocolMysql:     {Name: "container_mysql_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Mysql query"},
-		l7.ProtocolMongo:     {Name: "container_mongo_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Mongo query"},
-		l7.ProtocolKafka:     {Name: "container_kafka_requests_duration_seconds_total", Help: "Histogram of the execution time for each outbound Kafka request"},
-		l7.ProtocolCassandra: {Name: "container_cassandra_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Cassandra request"},
-		l7.ProtocolDubbo2:    {Name: "container_dubbo_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound DUBBO request"},
-		l7.ProtocolDNS:       {Name: "container_dns_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound DNS request"},
-		l7.ProtocolDM:        {Name: "container_dm_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound DaMeng query"},
+		l7.ProtocolHTTP:      {Name: "process_http_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound HTTP request"},
+		l7.ProtocolPostgres:  {Name: "process_postgres_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Postgres query"},
+		l7.ProtocolRedis:     {Name: "process_redis_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Redis query"},
+		l7.ProtocolMemcached: {Name: "process_memcached_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Memcached query"},
+		l7.ProtocolMysql:     {Name: "process_mysql_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Mysql query"},
+		l7.ProtocolMongo:     {Name: "process_mongo_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Mongo query"},
+		l7.ProtocolKafka:     {Name: "process_kafka_requests_duration_seconds_total", Help: "Histogram of the execution time for each outbound Kafka request"},
+		l7.ProtocolCassandra: {Name: "process_cassandra_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Cassandra request"},
+		l7.ProtocolDubbo2:    {Name: "process_dubbo_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound DUBBO request"},
+		l7.ProtocolDNS:       {Name: "process_dns_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound DNS request"},
+		l7.ProtocolDM:        {Name: "process_dm_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound DaMeng query"},
 	}
 )
 

+ 47 - 14
containers/process.go

@@ -1,6 +1,7 @@
 package containers
 
 import (
+	"bytes"
 	"context"
 	"fmt"
 	"github.com/coroot/coroot-node-agent/ebpftracer/tracer/jattach"
@@ -11,22 +12,24 @@ import (
 	"strings"
 	"time"
 
-	"github.com/jpillora/backoff"
-
 	"github.com/cilium/ebpf/link"
+	"github.com/coroot/coroot-node-agent/ebpftracer"
 	"github.com/coroot/coroot-node-agent/proc"
+	"github.com/jpillora/backoff"
 	"github.com/mdlayher/taskstats"
 )
 
 type Process struct {
 	Pid       uint32
 	StartedAt time.Time
-	NetNsId   string
+
+	netNsId string
 
 	ctx        context.Context
 	cancelFunc context.CancelFunc
 
 	dotNetMonitor *DotNetMonitor
+	isGolangApp   bool
 
 	uprobes               []link.Link
 	goTlsUprobesChecked   bool
@@ -36,25 +39,33 @@ type Process struct {
 
 	codeType CodeType
 	cmdline  string
+	pythonGilChecked      bool
 }
 
-func NewProcess(pid uint32, stats *taskstats.Stats) *Process {
-	ns, err := proc.GetNetNs(pid)
-	if err != nil {
-		return nil
-	}
-	defer ns.Close()
-	p := &Process{Pid: pid, StartedAt: stats.BeginTime, NetNsId: ns.UniqueId()}
+func NewProcess(pid uint32, stats *taskstats.Stats, tracer *ebpftracer.Tracer) *Process {
+	p := &Process{Pid: pid, StartedAt: stats.BeginTime}
 	p.ctx, p.cancelFunc = context.WithCancel(context.Background())
-	go p.instrument()
+	go p.instrument(tracer)
 	return p
 }
 
+func (p *Process) NetNsId() string {
+	if p.netNsId == "" {
+		ns, err := proc.GetNetNs(p.Pid)
+		if err != nil {
+			return ""
+		}
+		p.netNsId = ns.UniqueId()
+		_ = ns.Close()
+	}
+	return p.netNsId
+}
+
 func (p *Process) isHostNs() bool {
-	return p.NetNsId == hostNetNsId
+	return p.NetNsId() == hostNetNsId
 }
 
-func (p *Process) instrument() {
+func (p *Process) instrument(tracer *ebpftracer.Tracer) {
 	b := backoff.Backoff{Factor: 2, Min: time.Second, Max: time.Minute}
 	for {
 		select {
@@ -66,18 +77,40 @@ func (p *Process) instrument() {
 				return
 			}
 			if dest != "/" {
+				p.instrumentPython(tracer)
 				if dotNetAppName, err := dotNetApp(p.Pid); err == nil {
 					if dotNetAppName != "" {
 						p.dotNetMonitor = NewDotNetMonitor(p.ctx, p.Pid, dotNetAppName)
 					}
-					return
 				}
+				return
 			}
 			time.Sleep(b.Duration())
 		}
 	}
 }
 
+func (p *Process) instrumentPython(tracer *ebpftracer.Tracer) {
+	if p.pythonGilChecked {
+		return
+	}
+	p.pythonGilChecked = true
+	cmdline := proc.GetCmdline(p.Pid)
+	if len(cmdline) == 0 {
+		return
+	}
+	parts := bytes.Split(cmdline, []byte{0})
+	cmd := parts[0]
+	if len(cmd) == 0 {
+		return
+	}
+	cmd = bytes.TrimSuffix(bytes.Fields(cmd)[0], []byte{':'})
+	if !pythonCmd.Match(cmd) {
+		return
+	}
+	p.uprobes = append(p.uprobes, tracer.AttachPythonThreadLockProbes(p.Pid)...)
+}
+
 func (p *Process) Close() {
 	p.cancelFunc()
 	for _, u := range p.uprobes {

+ 110 - 31
containers/registry.go

@@ -3,18 +3,19 @@ package containers
 import (
 	"bytes"
 	"fmt"
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
 	"github.com/coroot/coroot-node-agent/kube"
 	. "github.com/coroot/coroot-node-agent/utils"
 	"github.com/coroot/coroot-node-agent/utils/enums"
 	. "github.com/coroot/coroot-node-agent/utils/modelse"
 	"github.com/coroot/coroot-node-agent/utils/try"
 	. "github.com/coroot/coroot-node-agent/utils/worker"
-	log "github.com/sirupsen/logrus"
-	"os"
-	"regexp"
-	"strings"
-	"sync"
-	"time"
 
 	"github.com/coroot/coroot-node-agent/cgroup"
 	"github.com/coroot/coroot-node-agent/common"
@@ -28,11 +29,15 @@ import (
 	"inet.af/netaddr"
 )
 
+const MinTrafficStatsUpdateInterval = 5 * time.Second
+
 var (
-	selfNetNs         = netns.None()
-	hostNetNsId       = netns.None().UniqueId()
-	agentPid          = uint32(os.Getpid())
-	containerIdRegexp = regexp.MustCompile(`[a-z0-9]{64}`)
+	selfNetNs                = netns.None()
+	hostNetNsId              = netns.None().UniqueId()
+	agentPid                 = uint32(os.Getpid())
+	containerIdRegexp        = regexp.MustCompile(`[a-z0-9]{64}`)
+	cronjobPodName           = regexp.MustCompile(`([a-z0-9-]+)-([0-9]{8})-[bcdfghjklmnpqrstvwxz2456789]{5}`)
+	cronjobPodScheduleWindow = 7 * 24 * time.Hour
 )
 
 type ProcessInfo struct {
@@ -57,9 +62,12 @@ type Registry struct {
 
 	processInfoCh chan<- ProcessInfo
 
-	whiteListRules       WhiteListMap
-	whiteLastUpdatedTime int
-	connServer           ServerWorker
+	whiteListRules          WhiteListMap
+	whiteLastUpdatedTime    int
+	connServer              ServerWorker
+	trafficStatsLastUpdated time.Time
+	trafficStatsLock        sync.Mutex
+	trafficStatsUpdateCh    chan *TrafficStatsUpdate
 	nodeInfo             *NodeInfoT
 }
 
@@ -123,8 +131,9 @@ func NewRegistry(reg prometheus.Registerer, kernelVersion string, nodeInfo *Node
 
 		processInfoCh: processInfoCh,
 
-		tracer:         ebpftracer.NewTracer(kernelVersion, *flags.DisableL7Tracing, *flags.DisableE2ETracing, *flags.DisableStackTracing),
-		whiteListRules: make(WhiteListMap),
+		tracer:               ebpftracer.NewTracer(kernelVersion, *flags.DisableL7Tracing, *flags.DisableE2ETracing, *flags.DisableStackTracing),
+		whiteListRules:       make(WhiteListMap),
+		trafficStatsUpdateCh: make(chan *TrafficStatsUpdate),
 		nodeInfo:       nodeInfo,
 	}
 	// 初始化软负载集群节点
@@ -134,13 +143,13 @@ func NewRegistry(reg prometheus.Registerer, kernelVersion string, nodeInfo *Node
 		try.Go(proxyClient.CheckEndpoints, CatchFn)
 		log.Infof("New Proxy Client success.config_server is [%s]", *flags.ConfigServer)
 	} else {
-		log.WithError(clientErr).Errorf("NewProxyClient error, Please check [export CONFIG_ENDPOINT=ip:port]")
+		klog.WithError(clientErr).Errorf("NewProxyClient error, Please check [export CONFIG_ENDPOINT=ip:port]")
 		return nil, clientErr
 	}
 
 	r.connServer, err = NewServerHTTPWorker()
 	if err != nil {
-		log.Errorf("init connServer error:%s.", err)
+		klog.Errorf("init connServer error:%s.", err)
 		return nil, err
 	}
 	if !*flags.DisableRegisterHost {
@@ -195,7 +204,7 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 		case now := <-gcTicker.C:
 			_, err := r.getWhiteList()
 			if err != nil {
-				log.WithError(err).Errorf("connWhiteList error")
+				klog.WithError(err).Errorf("connWhiteList error")
 			}
 			runtimeApps := make(map[uint32]AppStatusInfo)
 			for pid, c := range r.containersByPid {
@@ -266,12 +275,12 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 
 			activeIPs := map[netaddr.IP]struct{}{}
 			for id, c := range r.containersById {
-				if !c.Dead(now) {
-					continue
-				}
 				for dst := range c.connectLastAttempt {
 					activeIPs[dst.IP()] = struct{}{}
 				}
+				if !c.Dead(now) {
+					continue
+				}
 				klog.Infoln("deleting dead container:", id)
 				for cg, cc := range r.containersByCgroupId {
 					if cc == c {
@@ -301,6 +310,13 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 				}
 			}
 			r.ip2fqdnLock.Unlock()
+		case u := <-r.trafficStatsUpdateCh:
+			if u == nil {
+				continue
+			}
+			if c := r.containersByPid[u.Pid]; c != nil {
+				c.updateTrafficStats(u)
+			}
 		case e, more := <-ch:
 			if e.Pid == uint32(os.Getpid()) {
 				continue
@@ -362,10 +378,18 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 				} else {
 					klog.Infoln("TCP listen open from unknown container", e)
 				}
+			case ebpftracer.EventTypeAcceptOpen:
+				klog.Infoln("ebpftracer.EventTypeAcceptOpen==================", e.Pid)
+				if c := r.getOrCreateContainer(e.Pid); c != nil {
+					c.onAcceptOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.Timestamp, false, e.Duration)
+					c.eventReady()
+				} else {
+					klog.Infoln("TCP connection from unknown container", e)
+				}
 			case ebpftracer.EventTypeConnectionOpen:
 				//fmt.Println("ebpftracer.EventTypeConnectionOpen==================", e.Pid)
 				if c := r.getOrCreateContainer(e.Pid); c != nil {
-					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.Timestamp, false)
+					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.Timestamp, false, e.Duration)
 					if !c.checkEventReady() && c.buildIDs(e.Pid) {
 						c.eventReady()
 					}
@@ -391,21 +415,22 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 				}
 			case ebpftracer.EventTypeConnectionError:
 				if c := r.getOrCreateContainer(e.Pid); c != nil {
-					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, 0, true)
+					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, 0, true, e.Duration)
 				} else {
 					klog.Infoln("TCP connection error from unknown container", e)
 				}
 			case ebpftracer.EventTypeConnectionClose:
-				srcDst := AddrPair{src: e.SrcAddr, dst: e.DstAddr}
-				for _, c := range r.containersById {
-					if c.onConnectionClose(srcDst) {
-						break
-					}
+				if c := r.containersByPid[e.Pid]; c != nil {
+					c.onConnectionClose(e)
+				}
+			case ebpftracer.EventTypeAcceptClose:
+				if c := r.containersByPid[e.Pid]; c != nil {
+					c.onAcceptClose(e)
 				}
 			case ebpftracer.EventTypeTCPRetransmit:
 				srcDst := AddrPair{src: e.SrcAddr, dst: e.DstAddr}
 				for _, c := range r.containersById {
-					if c.onRetransmit(srcDst) {
+					if c.onRetransmission(srcDst) {
 						break
 					}
 				}
@@ -420,6 +445,7 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 					//fmt.Println("EventTypeL7Request", e.Pid, c.checkL7AttachReady())
 					//a, _ := json.Marshal(e.L7Request)
 					//fmt.Println("EventTypeL7Request", e.Pid, string(a))
+					fmt.Println("e---.L7Request Payload:", string(e.L7Request.Payload))
 					ip2fqdn := c.onL7RequestApm(e.Pid, e.Fd, e.Timestamp, e.L7Request)
 					r.ip2fqdnLock.Lock()
 					for ip, fqdn := range ip2fqdn {
@@ -512,8 +538,7 @@ func (r *Registry) getOrCreateContainer(pid uint32) *Container {
 		r.containersByCgroupId[cgId] = c
 		return c
 	}
-
-	c, err := NewContainer(id, cg, md, r.hostConntrack, pid)
+	c, err := NewContainer(id, cg, md, r.hostConntrack, pid, r)
 	if err != nil {
 		klog.Warningf("failed to create container pid=%d cg=%s id=%s: %s", pid, cg.Id, id, err)
 		return nil
@@ -539,6 +564,31 @@ func (r *Registry) getOrCreateContainer(pid uint32) *Container {
 	return c
 }
 
+func (r *Registry) updateTrafficStatsIfNecessary() {
+	r.trafficStatsLock.Lock()
+	defer r.trafficStatsLock.Unlock()
+
+	if time.Now().Sub(r.trafficStatsLastUpdated) < MinTrafficStatsUpdateInterval {
+		return
+	}
+	iter := r.tracer.ActiveConnectionsIterator()
+	cid := ebpftracer.ConnectionId{}
+	stats := ebpftracer.Connection{}
+	for iter.Next(&cid, &stats) {
+		r.trafficStatsUpdateCh <- &TrafficStatsUpdate{
+			Pid:           cid.PID,
+			FD:            cid.FD,
+			BytesSent:     stats.BytesSent,
+			BytesReceived: stats.BytesReceived,
+		}
+	}
+	if err := iter.Err(); err != nil {
+		klog.Warningln(err)
+	}
+	r.trafficStatsUpdateCh <- nil
+	r.trafficStatsLastUpdated = time.Now()
+}
+
 func calcId(cg *cgroup.Cgroup, md *ContainerMetadata, pid uint32) (ContainerID, map[string]string) {
 	// 卡一下防止概率性获取为bash
 	time.Sleep(1 * time.Millisecond)
@@ -578,6 +628,14 @@ func calcId(cg *cgroup.Cgroup, md *ContainerMetadata, pid uint32) (ContainerID,
 		}
 		extensionTag[PodName] = pod
 		//extensionTag[ProcessName] = name
+		if g := cronjobPodName.FindStringSubmatch(pod); len(g) == 3 {
+			now := time.Now()
+			tsMiniutes, _ := strconv.ParseUint(g[2], 10, 64)
+			scheduledAt := time.Unix(int64(tsMiniutes)*60, 0)
+			if scheduledAt.After(now.Add(-cronjobPodScheduleWindow)) && scheduledAt.Before(now.Add(cronjobPodScheduleWindow)) {
+				return ContainerID(fmt.Sprintf("/k8s-cronjob/%s/%s/%s", namespace, g[1], name)), extensionTag
+			}
+		}
 		return ContainerID(fmt.Sprintf("/k8s/%s/%s/%s", namespace, pod, name)), extensionTag
 	}
 	if taskNameParts := strings.SplitN(md.labels["com.docker.swarm.task.name"], ".", 3); len(taskNameParts) == 3 {
@@ -591,6 +649,16 @@ func calcId(cg *cgroup.Cgroup, md *ContainerMetadata, pid uint32) (ContainerID,
 		}
 		return ContainerID(fmt.Sprintf("/swarm/%s/%s/%s", namespace, service, taskNameParts[1])), extensionTag
 	}
+	if md.env != nil {
+		allocId := md.env["NOMAD_ALLOC_ID"]
+		group := md.env["NOMAD_GROUP_NAME"]
+		job := md.env["NOMAD_JOB_NAME"]
+		namespace := md.env["NOMAD_NAMESPACE"]
+		task := md.env["NOMAD_TASK_NAME"]
+		if allocId != "" && group != "" && job != "" && namespace != "" && task != "" {
+			return ContainerID(fmt.Sprintf("/nomad/%s/%s/%s/%s/%s", namespace, job, group, allocId, task)), extensionTag
+		}
+	}
 	if md.name == "" { // should be "pure" dockerd container here
 		klog.Warningln("empty dockerd container name for:", cg.ContainerId)
 		return "", extensionTag
@@ -600,6 +668,10 @@ func calcId(cg *cgroup.Cgroup, md *ContainerMetadata, pid uint32) (ContainerID,
 
 func getContainerMetadata(cg *cgroup.Cgroup) (*ContainerMetadata, error) {
 	switch cg.ContainerType {
+	case cgroup.ContainerTypeSystemdService:
+		md := &ContainerMetadata{}
+		md.systemdTriggeredBy = SystemdTriggeredBy(cg.ContainerId)
+		return md, nil
 	case cgroup.ContainerTypeDocker, cgroup.ContainerTypeContainerd, cgroup.ContainerTypeSandbox, cgroup.ContainerTypeCrio:
 	default:
 		return &ContainerMetadata{}, nil
@@ -628,3 +700,10 @@ func getContainerMetadata(cg *cgroup.Cgroup) (*ContainerMetadata, error) {
 	}
 	return nil, fmt.Errorf("failed to interact with dockerd (%s) or with containerd (%s)", dockerdErr, containerdErr)
 }
+
+type TrafficStatsUpdate struct {
+	Pid           uint32
+	FD            uint64
+	BytesSent     uint64
+	BytesReceived uint64
+}

+ 56 - 0
containers/systemd.go

@@ -0,0 +1,56 @@
+package containers
+
+import (
+	"context"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+
+	"github.com/coroot/coroot-node-agent/proc"
+
+	"github.com/coreos/go-systemd/v22/dbus"
+	gdbus "github.com/godbus/dbus/v5"
+
+	"k8s.io/klog/v2"
+)
+
+var (
+	dbusConn    *dbus.Conn
+	dbusTimeout = time.Second
+)
+
+func init() {
+	var err error
+	dbusConn, err = dbus.NewConnection(func() (*gdbus.Conn, error) {
+		c, err := gdbus.Dial("unix:path=" + proc.HostPath("/run/systemd/private"))
+		if err != nil {
+			return nil, err
+		}
+		methods := []gdbus.Auth{gdbus.AuthExternal(strconv.Itoa(os.Getuid()))}
+		if err = c.Auth(methods); err != nil {
+			dbusConn.Close()
+			return nil, err
+		}
+		return c, nil
+	})
+	if err != nil {
+		klog.Warningln("failed to connect to systemd bus:", err)
+	}
+}
+
+func SystemdTriggeredBy(id string) string {
+	if dbusConn == nil {
+		return ""
+	}
+	ctx, cancel := context.WithTimeout(context.Background(), dbusTimeout)
+	defer cancel()
+	parts := strings.Split(id, "/")
+	unit := parts[len(parts)-1]
+	if prop, _ := dbusConn.GetUnitPropertyContext(ctx, unit, "TriggeredBy"); prop != nil {
+		if values, _ := prop.Value.Value().([]string); len(values) > 0 {
+			return values[0]
+		}
+	}
+	return ""
+}

+ 13 - 10
ebpftracer/ebpf/ebpf.c

@@ -14,15 +14,18 @@
 //#include <bpf/bpf_endian.h>
 #include "common/bpf/bpf_endian.h"
 
-#define EVENT_TYPE_PROCESS_START	1
-#define EVENT_TYPE_PROCESS_EXIT		2
-#define EVENT_TYPE_CONNECTION_OPEN	3
-#define EVENT_TYPE_CONNECTION_CLOSE	4
-#define EVENT_TYPE_CONNECTION_ERROR	5
-#define EVENT_TYPE_LISTEN_OPEN		6
-#define EVENT_TYPE_LISTEN_CLOSE 	7
-#define EVENT_TYPE_FILE_OPEN		8
-#define EVENT_TYPE_TCP_RETRANSMIT	9
+#define EVENT_TYPE_PROCESS_START	    1
+#define EVENT_TYPE_PROCESS_EXIT		    2
+#define EVENT_TYPE_CONNECTION_OPEN	    3
+#define EVENT_TYPE_CONNECTION_CLOSE	    4
+#define EVENT_TYPE_CONNECTION_ERROR	    5
+#define EVENT_TYPE_LISTEN_OPEN		    6
+#define EVENT_TYPE_LISTEN_CLOSE 	    7
+#define EVENT_TYPE_FILE_OPEN		    8
+#define EVENT_TYPE_TCP_RETRANSMIT	    9
+#define EVENT_TYPE_PYTHON_THREAD_LOCK	11
+#define EVENT_TYPE_ACCEPT_OPEN		    13
+#define EVENT_TYPE_ACCEPT_CLOSE 	    14
 
 #define EVENT_REASON_OOM_KILL		1
 
@@ -49,7 +52,7 @@
 #include "tcp/retransmit.c"
 //#include "l7/uprobe_base_bpf.c"
 #include "l7/l7.c"
-//#include "l7/gotls.c"
+#include "l7/gotls.c"
 //#include "l7/openssl.c"
 #include "utrace/go/net/server.probe.bpf.c"
 #include "utrace/go/net/client.probe.bpf.c"

+ 0 - 3
ebpftracer/ebpf/l7/dns.c

@@ -45,9 +45,6 @@ int is_dns_response(char *buf, __u64 buf_size, __s16 *stream_id, __u32 *status)
     if (h.bits0 & DNS_OPCODE) {
        return 0;
     }
-    if (!(h.bits1 & DNS_Z)) {
-        return 0;
-    }
     h.qdcount = bpf_ntohs(h.qdcount);
     if (h.qdcount != 1) {
         return 0;

+ 4 - 4
ebpftracer/ebpf/l7/gotls.c

@@ -1,8 +1,8 @@
 // Go internal ABI specification: https://go.dev/s/regabi
 #if defined(__TARGET_ARCH_x86)
-#define GO_PARAM1(x) ((x)->ax)
-#define GO_PARAM2(x) ((x)->bx)
-#define GO_PARAM3(x) ((x)->cx)
+#define GO_PARAM1(x) ((x)->rax)
+#define GO_PARAM2(x) ((x)->rbx)
+#define GO_PARAM3(x) ((x)->rcx)
 #define GOROUTINE(x) ((x)->r14)
 #define PT_GO_REGS_PARM1(x) ((x)->rax)
 #define PT_GO_REGS_PARM2(x) ((x)->rbx)
@@ -60,7 +60,7 @@ int go_crypto_tls_read_enter(struct pt_regs *ctx) {
     __u64 goroutine_id = GOROUTINE(ctx);
     __u64 pid = pid_tgid >> 32;
     __u64 id = pid << 32 | goroutine_id | IS_TLS_READ_ID;
-    return trace_enter_read(id, fd, buf_ptr, 0, 0);
+    return trace_enter_read(id, pid, fd, buf_ptr, 0, 0);
 }
 
 SEC("uprobe/go_crypto_tls_read_exit")

+ 144 - 50
ebpftracer/ebpf/l7/l7.c

@@ -79,6 +79,10 @@ struct l7_event {
     __u32 trace_start;
     __u32 trace_end;
     __u32 event_count;
+    __u16 sport;
+    __u16 dport;
+    __u8 saddr[16];
+    __u8 daddr[16];
 	unsigned char assumed_app_id[APM_ASSUMED_APP_ID_SIZE];
 	unsigned char span_id[APM_SPAN_ID_SIZE];
 	unsigned char trace_id_from[APM_TRACE_ID_SIZE];
@@ -123,7 +127,7 @@ struct read_args {
 };
 
 struct {
-    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(type, BPF_MAP_TYPE_LRU_HASH);
     __uint(key_size, sizeof(__u64));
     __uint(value_size, sizeof(struct read_args));
     __uint(max_entries, 10240);
@@ -230,22 +234,10 @@ struct l7_user_msghdr {
 };
 
 static inline __attribute__((__always_inline__))
-void send_event(void *ctx, struct l7_event *e, __u32 pid, __u64 fd) {
-    struct sk_info sk = {};
-    sk.pid = pid;
-    sk.fd = fd;
-    __u64 *timestamp = bpf_map_lookup_elem(&connection_timestamps, &sk);
-    if (timestamp) {
-        if (*timestamp == 0) {
-//	        cw_bpf_debug("timestamp=0");
-            return;
-        }
-        e->connection_timestamp = *timestamp;
-    } else {
-        e->connection_timestamp = 0;
-    }
-    e->fd = fd;
-    e->pid = pid;
+void send_event(void *ctx, struct l7_event *e, struct connection_id cid, struct connection *conn) {
+    e->connection_timestamp = conn->timestamp;
+    e->fd = cid.fd;
+    e->pid = cid.pid;
     long error = bpf_perf_event_output(ctx, &l7_events, BPF_F_CURRENT_CPU, e, sizeof(*e));
 	if (error ==0){
 	        cw_add_event_count(e->trace_id);
@@ -253,7 +245,7 @@ void send_event(void *ctx, struct l7_event *e, __u32 pid, __u64 fd) {
 }
 
 static inline __attribute__((__always_inline__))
-__u64 read_iovec(char *l7_iovec, __u64 iovlen, __u64 ret, char *buf) {
+__u64 read_iovec(char *iovec, __u64 iovlen, __u64 ret, char *buf, __u64 *total_size) {
     struct l7_iovec iov = {};
     __u64 max = (ret) ? MIN(ret, MAX_PAYLOAD_SIZE) : MAX_PAYLOAD_SIZE;
     __u64 offset = 0;
@@ -263,21 +255,21 @@ __u64 read_iovec(char *l7_iovec, __u64 iovlen, __u64 ret, char *buf) {
         if (i >= iovlen) {
             break;
         }
-        if (bpf_probe_read(&iov, sizeof(iov), (void *)(l7_iovec+i*sizeof(iov)))) {
+        if (bpf_probe_read(&iov, sizeof(iov), (void *)(iovec+i*sizeof(iov)))) {
             return 0;
         }
         if (iov.size <= 0) {
             continue;
         }
-        size = MIN(iov.size, max-offset);
-        TRUNCATE_PAYLOAD_SIZE(size);
-        TRUNCATE_PAYLOAD_SIZE(offset);
-        if (bpf_probe_read(buf + offset, size, (void *)iov.buf)) {
-            return 0;
-        }
-        offset += size;
-        if (offset >= max) {
-            break;
+        *total_size += iov.size;
+        if (offset < max) {
+            size = MIN(iov.size, max-offset);
+            TRUNCATE_PAYLOAD_SIZE(size);
+            TRUNCATE_PAYLOAD_SIZE(offset);
+            if (bpf_probe_read(buf + offset, size, (void *)iov.buf)) {
+                return 0;
+            }
+            offset += size;
         }
     }
     return offset;
@@ -296,12 +288,13 @@ int trace_dns_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 si
 		return 0;
 	}
 	char* payload = buf;
+    __u64 total_size = 0;
 	if (iovlen) {
 		payload = bpf_map_lookup_elem(&iovec_buf_heap, &zero);
 		if (!payload) {
 			return 0;
 		}
-		size = read_iovec(buf, iovlen, 0, payload);
+		size = read_iovec(buf, iovlen, 0, payload, &total_size);
 	}
 	if (!size) {
 		return 0;
@@ -342,9 +335,10 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
     __u32 zero = 0;
     __u32 pid, tid;
     __u32 http_status ;
-
     pid = id >> 32;
     tid =  (__u32)id;
+    __u64 total_size = size;
+
     if (load_filter_pid() != 0 && pid != load_filter_pid()) {
         return 0;
     }
@@ -354,7 +348,8 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
         if (!payload) {
             return 0;
         }
-        size = read_iovec(buf, iovlen, 0, payload);
+        total_size = 0;
+        size = read_iovec(buf, iovlen, 0, payload, &total_size);
     }
     if (!size) {
         return 0;
@@ -370,11 +365,15 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
     req->ns = 0;
     req->payload_size = size;
     struct l7_request_key k = {};
-    k.pid = id >> 32;
+    k.pid = pid;
     k.fd = fd;
     k.is_tls = is_tls;
     k.stream_id = -1;
 
+    struct connection_id cid = {};
+    cid.pid = pid;
+    cid.fd = fd;
+
 //    cw_bpf_debug("enter-payload:%s|type:%s|FD:%d\n",payload,"type",k.fd);
 
     if (is_http_response(payload, &http_status))
@@ -431,6 +430,20 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
 	    bpf_map_delete_elem(&trace_event_count_heap, &trace_id);
 	    // 清除trace信息
 	    cw_clear_trace(pid, tid, fd);
+        cw_bpf_debug("socket accept bytes_sent cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        struct accept_connection *accept_conn = bpf_map_lookup_elem(&active_accepts, &cid);
+        if (accept_conn) {
+            cw_bpf_debug("socket accept bytes_sent after cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+            cw_bpf_debug("rock enter the  accept_conn function cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+            e->sport = accept_conn->sport;
+            e->dport = accept_conn->dport;
+            __builtin_memcpy(&e->saddr, &accept_conn->saddr, sizeof(e->saddr));
+            __builtin_memcpy(&e->daddr, &accept_conn->daddr, sizeof(e->daddr));
+            // __sync_fetch_and_add(&accept_conn->bytes_sent, total_size);
+            cw_bpf_debug("socket sys_exit_accept--- accept_conn daddr=%llu, daddr=%llu\n", e->saddr[10], e->saddr[11]);
+            cw_bpf_debug("socket sys_exit_accept--- accept_conn daddr=%llu, daddr=%llu\n", e->saddr[12], e->saddr[13]);
+            cw_bpf_debug("socket sys_exit_accept--- accept_conn daddr=%llu, daddr=%llu\n", e->saddr[14], e->saddr[15]);
+        }
         bpf_perf_event_output(ctx, &l7_events, BPF_F_CURRENT_CPU, e, sizeof(*e));
         // 发送事件到用户空间 end
 //        __u64 k_version = load_filter_pid();
@@ -449,9 +462,41 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
 //        cw_bpf_debug("off->task__files_offset:%x", off->task__files_offset);
 //        cw_bpf_debug("e->test_id1111:%d", ttt->test_id);
 	    cw_bpf_debug("HTTP_END");
+
+        // //TODO 4 查询 
+        // cw_bpf_debug("socket accept bytes_sent cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        // struct accept_connection *accept_conn = bpf_map_lookup_elem(&active_accepts, &cid);
+        // if (accept_conn && !is_tls) {
+        //     cw_bpf_debug("socket accept bytes_sent after cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        //     e.sport = accept_conn.sport;
+        //     e.dport = accept_conn.dport;
+        //     __builtin_memcpy(&e.saddr, &accept_conn.saddr, sizeof(e.saddr));
+        //     __builtin_memcpy(&e.daddr, &accept_conn.daddr, sizeof(e.daddr));
+        //     // __sync_fetch_and_add(&accept_conn->bytes_sent, total_size);
+            
+        // }
+        return 0;
+    }
+
+    struct connection *conn = bpf_map_lookup_elem(&active_connections, &cid);
+    if (!conn) {
+        //TODO 4 查询 
+        // cw_bpf_debug("socket accept bytes_sent cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        // struct connection *accept_conn = bpf_map_lookup_elem(&active_accepts, &cid);
+        // if (accept_conn && !is_tls) {
+        //     cw_bpf_debug("socket accept bytes_sent after cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        //     __sync_fetch_and_add(&accept_conn->bytes_sent, total_size);
+        // }
         return 0;
     }
 
+    if (!is_tls) {
+        __sync_fetch_and_add(&conn->bytes_sent, total_size);
+        if(conn->first_write_time == 0){
+            conn->first_write_time = bpf_ktime_get_ns();
+        }
+    }
+
     if (is_http_request(payload)) {
 	    cw_bpf_debug("");
 	    cw_bpf_debug("-----[Kernel HTTP Enter]:pid:[%d]|CURRENT-GOID:[%llu]|FD:[%d]", tid, get_current_goroutine(), k.fd);
@@ -495,7 +540,7 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
             e->method = METHOD_STATEMENT_CLOSE;
             e->payload_size = size;
             COPY_PAYLOAD(e->payload, size, payload);
-            send_event(ctx, e, k.pid, k.fd);
+            send_event(ctx, e, cid, conn);
             return 0;
         }
         req->protocol = PROTOCOL_POSTGRES;
@@ -519,7 +564,7 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
             e->payload_size = size;
             COPY_PAYLOAD(e->payload, size, payload);
 	        cw_bpf_debug("[Enter][Mysql][Send]:thread_id:%d\n",tid);
-            send_event(ctx, e, k.pid, k.fd);
+            send_event(ctx, e, cid, conn);
             return 0;
         }
         req->protocol = PROTOCOL_MYSQL;
@@ -565,7 +610,7 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
         }
         e->protocol = PROTOCOL_RABBITMQ;
         e->method = METHOD_PRODUCE;
-        send_event(ctx, e, k.pid, k.fd);
+        send_event(ctx, e, cid, conn);
         return 0;
     } else if (nats_method(payload, size) == METHOD_PRODUCE) {
         struct l7_event *e = bpf_map_lookup_elem(&l7_event_heap, &zero);
@@ -574,7 +619,7 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
         }
         e->protocol = PROTOCOL_NATS;
         e->method = METHOD_PRODUCE;
-        send_event(ctx, e, k.pid, k.fd);
+        send_event(ctx, e, cid, conn);
         return 0;
     } else if (is_cassandra_request(payload, size, &k.stream_id)) {
         req->protocol = PROTOCOL_CASSANDRA;
@@ -595,7 +640,7 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
         e->payload_size = size;
         e->trace_id = get_apm_trace_id(pid, tid);
         COPY_PAYLOAD(e->payload, size, payload);
-        send_event(ctx, e, k.pid, k.fd);
+        send_event(ctx, e, cid, conn);
         return 0;
     } else if (is_dubbo2_request(payload, size)) {
         req->protocol = PROTOCOL_DUBBO2;
@@ -615,7 +660,16 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
 }
 
 static inline __attribute__((__always_inline__))
-int trace_enter_read(__u64 id, __u64 fd, char *buf, __u64 *ret, __u64 iovlen) {
+int trace_enter_read(__u64 id, __u32 pid, __u64 fd, char *buf, __u64 *ret, __u64 iovlen) {
+    // struct connection_id cid = {};
+    // cid.pid = pid;
+    // cid.fd = fd;
+
+    // struct connection *conn = bpf_map_lookup_elem(&active_connections, &cid);
+    // if (!conn) {
+    //     // cw_bpf_debug("trace_enter_read no conn\n");
+    //     return 0;
+    // }
     struct read_args args = {};
     args.fd = fd;
     args.buf = buf;
@@ -637,7 +691,6 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
     if (!args) {
         return 0;
     }
-
     struct l7_request_key k = {};
     k.pid = pid;
     k.fd = args->fd;
@@ -657,7 +710,7 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
             return 0;
         }
     }
-
+    __u64 total_size = ret;
     int zero = 0;
     char* payload = args->buf;
     if (args->iovlen) {
@@ -665,7 +718,8 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
         if (!payload) {
             return 0;
         }
-        ret = read_iovec(args->buf, args->iovlen, ret, payload);
+        total_size = 0;
+        ret = read_iovec(args->buf, args->iovlen, ret, payload, &total_size);
         if (!ret) {
             return 0;
         }
@@ -698,7 +752,11 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
 //    infer_dns_message(payload, (int)PT_REGS_RC((struct pt_regs *)ctx),
 //                      conn_info);
 
+    struct connection_id cid = {};
+    cid.pid = pid;
+    cid.fd = args->fd;
     // 被调用方http入口
+    // 作为服务端在走。coroot 原有逻辑是没有的
     if (is_http_request(payload)) {
         struct l7_request *req = bpf_map_lookup_elem(&l7_request_heap, &zero);
         if (!req)
@@ -753,19 +811,50 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
 //	    bpf_map_update_elem(&fd_trace_info_heap, &fd_trace_key, &trace_info, BPF_NOEXIST);
         bpf_perf_event_output(ctx, &l7_events, BPF_F_CURRENT_CPU, e, sizeof(*e));
         cw_bpf_debug("[Receive][HTTP] to user space");
+
+        // 作为服务端统计 bytes_received 使用
+        // struct connection *accept_conn = bpf_map_lookup_elem(&active_accepts, &cid);
+        // cw_bpf_debug("socket accept bytes_received cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        // if (accept_conn && !is_tls){
+        //     cw_bpf_debug("socket accept bytes_received after cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        //     __sync_fetch_and_add(&accept_conn->bytes_received, total_size);
+        // }
         return 0;
     }
 
+    struct connection *conn = bpf_map_lookup_elem(&active_connections, &cid);
+    if (args && !conn) {
+        bpf_map_delete_elem(&active_reads, &id);
+        // struct connection *accept_conn = bpf_map_lookup_elem(&active_accepts, &cid);
+        // cw_bpf_debug("socket accept bytes_received cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        // if (accept_conn && !is_tls){
+        //     cw_bpf_debug("socket accept bytes_received after cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        //     __sync_fetch_and_add(&accept_conn->bytes_received, total_size);
+        // }
+        return 0;
+    }
+
+    //TODO 5 同发送逻辑。
+
+    /// coroot 是以客户端为主体做统计的,所以这里是客户端逻辑
+    if (!is_tls) {
+        __sync_fetch_and_add(&conn->bytes_received, total_size);
+        if(conn->first_read_time == 0){
+            conn->first_read_time = bpf_ktime_get_ns();
+        }
+        conn->new_read_time = bpf_ktime_get_ns();
+    }
+
     if (is_rabbitmq_consume(payload, ret)) {
         e->protocol = PROTOCOL_RABBITMQ;
         e->method = METHOD_CONSUME;
-        send_event(ctx, e, k.pid, k.fd);
+        send_event(ctx, e, cid, conn);
         return 0;
     }
     if (nats_method(payload, ret) == METHOD_CONSUME) {
         e->protocol = PROTOCOL_NATS;
         e->method = METHOD_CONSUME;
-        send_event(ctx, e, k.pid, k.fd);
+        send_event(ctx, e, cid, conn);
         return 0;
     }
 
@@ -785,7 +874,8 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
 	        e->duration = e->end_at - req->ns;
             e->payload_size = ret;
             COPY_PAYLOAD(e->payload, ret, payload);
-            send_event(ctx, e, k.pid, k.fd);
+            send_event(ctx, e, cid, conn);
+            bpf_map_delete_elem(&active_l7_requests, &k);
             return 0;
         } else if (is_cassandra_response(payload, ret, &k.stream_id, &e->status)) {
             req = bpf_map_lookup_elem(&active_l7_requests, &k);
@@ -800,7 +890,7 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
             e->payload_size = ret;
             e->trace_id = get_apm_trace_id(pid, tid);
             COPY_PAYLOAD(e->payload, ret, payload);
-            send_event(ctx, e, k.pid, k.fd);
+            send_event(ctx, e, cid, conn);
             return 0;
         } else {
 //	        cw_bpf_debug("bb 6:[0x%x] k.pid:%d, k.fd:%d",b[4],k.pid,k.fd);
@@ -1035,7 +1125,7 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
 	e->end_at = bpf_ktime_get_ns();
 	e->start_at = req->ns;
     e->duration = e->end_at - e->start_at;
-    send_event(ctx, e, k.pid, k.fd);
+    send_event(ctx, e, cid, conn);
     return 0;
 }
 
@@ -1089,13 +1179,15 @@ int sys_enter_sendto(struct trace_event_raw_sys_enter_rw__stub* ctx) {
 SEC("tracepoint/syscalls/sys_enter_read")
 int sys_enter_read(struct trace_event_raw_sys_enter_rw__stub* ctx) {
     __u64 id = bpf_get_current_pid_tgid();
-    return trace_enter_read(id, ctx->fd, ctx->buf, 0, 0);
+    __u32 pid = id >> 32;
+    return trace_enter_read(id, pid, ctx->fd, ctx->buf, 0, 0);
 }
 
 SEC("tracepoint/syscalls/sys_enter_readv")
 int sys_enter_readv(struct trace_event_raw_sys_enter_rw__stub* ctx) {
     __u64 id = bpf_get_current_pid_tgid();
-    return trace_enter_read(id, ctx->fd, ctx->buf, 0, ctx->size);
+    __u32 pid = id >> 32;
+    return trace_enter_read(id, pid, ctx->fd, ctx->buf, 0, ctx->size);
 }
 
 SEC("tracepoint/syscalls/sys_enter_recvmsg")
@@ -1105,13 +1197,15 @@ int sys_enter_recvmsg(struct trace_event_raw_sys_enter_rw__stub* ctx) {
     if (bpf_probe_read(&msghdr, sizeof(msghdr), (void *)ctx->buf)) {
         return 0;
     }
-    return trace_enter_read(id, ctx->fd, (char*)msghdr.msg_iov, 0, msghdr.msg_iovlen);
+    __u32 pid = id >> 32;
+    return trace_enter_read(id, pid, ctx->fd, (char*)msghdr.msg_iov, 0, msghdr.msg_iovlen);
 }
 
 SEC("tracepoint/syscalls/sys_enter_recvfrom")
 int sys_enter_recvfrom(struct trace_event_raw_sys_enter_rw__stub* ctx) {
     __u64 id = bpf_get_current_pid_tgid();
-    return trace_enter_read(id, ctx->fd, ctx->buf, 0, 0);
+    __u32 pid = id >> 32;
+    return trace_enter_read(id, pid, ctx->fd, ctx->buf, 0, 0);
 }
 
 SEC("tracepoint/syscalls/sys_exit_read")

+ 17 - 15
ebpftracer/ebpf/l7/openssl.c

@@ -69,23 +69,25 @@ struct ssl_st {
     return trace_enter_write(ctx, fd, 1, buf_ptr, buf_size, 0); \
 })
 
-#define READ_ENTER(ctx, bio_t)                      \
-({                                                  \
-    __u32 fd = GET_FD(ctx, bio_t, rbio);            \
-    char* buf_ptr = (char*)PT_REGS_PARM2(ctx);      \
-    __u64 pid_tgid = bpf_get_current_pid_tgid();    \
-    __u64 id = pid_tgid | IS_TLS_READ_ID;           \
-    return trace_enter_read(id, fd, buf_ptr, 0, 0); \
+#define READ_ENTER(ctx, bio_t)                           \
+({                                                       \
+    __u32 fd = GET_FD(ctx, bio_t, rbio);                 \
+    char* buf_ptr = (char*)PT_REGS_PARM2(ctx);           \
+    __u64 pid_tgid = bpf_get_current_pid_tgid();         \
+    __u32 pid = pid_tgid >> 32;                          \
+    __u64 id = pid_tgid | IS_TLS_READ_ID;                \
+    return trace_enter_read(id, pid, fd, buf_ptr, 0, 0); \
 })
 
-#define READ_EX_ENTER(ctx, bio_t)                           \
-({                                                          \
-    __u32 fd = GET_FD(ctx, bio_t, rbio);                    \
-    char* buf_ptr = (char*)PT_REGS_PARM2(ctx);              \
-    __u64 pid_tgid = bpf_get_current_pid_tgid();            \
-    __u64 id = pid_tgid | IS_TLS_READ_ID;                   \
-    __u64* ret_ptr = (__u64*)PT_REGS_PARM4(ctx);            \
-    return trace_enter_read(id, fd, buf_ptr, ret_ptr, 0);   \
+#define READ_EX_ENTER(ctx, bio_t)                              \
+({                                                             \
+    __u32 fd = GET_FD(ctx, bio_t, rbio);                       \
+    char* buf_ptr = (char*)PT_REGS_PARM2(ctx);                 \
+    __u64 pid_tgid = bpf_get_current_pid_tgid();               \
+    __u64 id = pid_tgid | IS_TLS_READ_ID;                      \
+    __u32 pid = pid_tgid >> 32;                                \
+    __u64* ret_ptr = (__u64*)PT_REGS_PARM4(ctx);               \
+    return trace_enter_read(id, pid, fd, buf_ptr, ret_ptr, 0); \
 })
 
 SEC("uprobe/openssl_SSL_write_enter")

+ 42 - 0
ebpftracer/ebpf/python.c

@@ -0,0 +1,42 @@
+struct {
+    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+    __uint(key_size, sizeof(int));
+    __uint(value_size, sizeof(int));
+} python_thread_events SEC(".maps");
+
+struct {
+    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(key_size, sizeof(__u64));
+    __uint(value_size, sizeof(__u64));
+    __uint(max_entries, 10240);
+} python_thread_locks SEC(".maps");
+
+SEC("uprobe/pthread_cond_timedwait_enter")
+int pthread_cond_timedwait_enter(struct pt_regs *ctx) {
+    __u64 pid_tgid = bpf_get_current_pid_tgid();
+    __u64 timestamp = bpf_ktime_get_ns();
+    bpf_map_update_elem(&python_thread_locks, &pid_tgid, &timestamp, BPF_ANY);
+    return 0;
+}
+
+struct python_thread_event {
+    __u32 type;
+    __u32 pid;
+    __u64 duration;
+};
+
+SEC("uprobe/pthread_cond_timedwait_exit")
+int pthread_cond_timedwait_exit(struct pt_regs *ctx) {
+    __u64 pid_tgid = bpf_get_current_pid_tgid();
+    __u64 *timestamp = bpf_map_lookup_elem(&python_thread_locks, &pid_tgid);
+    if (!timestamp) {
+        return 0;
+    }
+    struct python_thread_event e = {
+        .type = EVENT_TYPE_PYTHON_THREAD_LOCK,
+        .pid = pid_tgid >> 32,
+        .duration = bpf_ktime_get_ns()-*timestamp,
+    };
+    bpf_perf_event_output(ctx, &python_thread_events, BPF_F_CURRENT_CPU, &e, sizeof(e));
+    return 0;
+}

+ 417 - 42
ebpftracer/ebpf/tcp/state.c

@@ -1,12 +1,19 @@
 #ifndef IPPROTO_TCP
 #define IPPROTO_TCP 6
 #endif
+#define MAX_CONNECTIONS 1000000
 
 struct tcp_event {
     __u64 fd;
     __u64 timestamp;
+    __u64 duration;
+    __u64 first_read_time;
+    __u64 first_write_time;
+    __u64 new_read_time;
     __u32 type;
     __u32 pid;
+    __u64 bytes_sent;
+    __u64 bytes_received;
     __u16 sport;
     __u16 dport;
     __u8 saddr[16];
@@ -25,6 +32,12 @@ struct {
     __uint(value_size, sizeof(int));
 } tcp_connect_events SEC(".maps");
 
+struct {
+    __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+    __uint(key_size, sizeof(int));
+    __uint(value_size, sizeof(int));
+} tcp_accept_events SEC(".maps");
+
 struct trace_event_raw_inet_sock_set_state__stub {
     __u64 unused;
     void *skaddr;
@@ -51,23 +64,48 @@ struct {
     __uint(max_entries, 10240);
 } fd_by_pid_tgid SEC(".maps");
 
-struct sk_info {
+struct connection_id {
     __u64 fd;
     __u32 pid;
 };
+
 struct {
-    __uint(type, BPF_MAP_TYPE_HASH);
+    __uint(type, BPF_MAP_TYPE_LRU_HASH);
     __uint(key_size, sizeof(void *));
-    __uint(value_size, sizeof(struct sk_info));
-    __uint(max_entries, 10240);
-} sk_info SEC(".maps");
+    __uint(value_size, sizeof(struct connection_id));
+    __uint(max_entries, MAX_CONNECTIONS);
+} connection_id_by_socket SEC(".maps");
+
+struct connection {
+    __u64 timestamp;
+    __u64 bytes_sent;
+    __u64 bytes_received;
+    __u64 first_read_time;
+    __u64 first_write_time;
+    __u64 new_read_time;
+};
+
+struct accept_connection {
+    __u16 sport;
+    __u16 dport;
+    __u8 saddr[16];
+    __u8 daddr[16];
+};
 
 struct {
     __uint(type, BPF_MAP_TYPE_LRU_HASH);
-    __uint(key_size, sizeof(struct sk_info));
-    __uint(value_size, sizeof(__u64));
-    __uint(max_entries, 32768);
-} connection_timestamps SEC(".maps");
+    __uint(key_size, sizeof(struct connection_id));
+    __uint(value_size, sizeof(struct connection));
+    __uint(max_entries, MAX_CONNECTIONS);
+} active_connections SEC(".maps");
+
+struct {
+    __uint(type, BPF_MAP_TYPE_LRU_HASH);
+    __uint(key_size, sizeof(struct connection_id));
+    __uint(value_size, sizeof(struct accept_connection));
+    __uint(max_entries, MAX_CONNECTIONS);
+} active_accepts SEC(".maps");
+
 
 SEC("tracepoint/sock/inet_sock_set_state")
 int inet_sock_set_state(void *ctx)
@@ -81,47 +119,62 @@ int inet_sock_set_state(void *ctx)
     }
     __u64 id = bpf_get_current_pid_tgid();
     __u32 pid = id >> 32;
+    cw_bpf_debug("fucksocket pid=%lld inet_sock_set_state -- args.oldstate=%lld, args.newstate=%lld\n", pid, args.oldstate, args.newstate);
+    cw_bpf_debug("fucksocket pid=%lld inet_sock_set_state -- id=%lld\n", pid, id);
 
     if (args.oldstate == BPF_TCP_CLOSE && args.newstate == BPF_TCP_SYN_SENT) {
+        
         __u64 *fdp = bpf_map_lookup_elem(&fd_by_pid_tgid, &id);
 
         if (!fdp) {
             return 0;
         }
-        struct sk_info i = {};
-        i.pid = pid;
-        i.fd = *fdp;
+        struct connection_id cid = {};
+        cid.pid = pid;
+        cid.fd = *fdp;
+
+        struct connection conn = {};
+        conn.timestamp = bpf_ktime_get_ns();
+        conn.first_read_time = 0;
+        conn.first_write_time = 0;
+        conn.new_read_time = 0;
+
         bpf_map_delete_elem(&fd_by_pid_tgid, &id);
-        bpf_map_update_elem(&sk_info, &args.skaddr, &i, BPF_ANY);
+        bpf_map_update_elem(&connection_id_by_socket, &args.skaddr, &cid, BPF_ANY);
+        bpf_map_update_elem(&active_connections, &cid, &conn, BPF_ANY);
         return 0;
     }
 
     __u64 fd = 0;
     __u32 type = 0;
     __u64 timestamp = 0;
+    __u64 duration = 0;
     void *map = &tcp_connect_events;
+
+    struct tcp_event e = {};
+
     if (args.oldstate == BPF_TCP_SYN_SENT) {
-        struct sk_info *i = bpf_map_lookup_elem(&sk_info, &args.skaddr);
-        if (!i) {
+        struct connection_id *cid = bpf_map_lookup_elem(&connection_id_by_socket, &args.skaddr);
+        if (!cid) {
+            return 0;
+        }
+        struct connection *conn = bpf_map_lookup_elem(&active_connections, cid);
+        if (!conn) {
             return 0;
         }
         if (args.newstate == BPF_TCP_ESTABLISHED) {
-            timestamp = bpf_ktime_get_ns();
-            struct sk_info k = {};
-            k.pid = i->pid;
-            k.fd = i->fd;
-            bpf_map_update_elem(&connection_timestamps, &k, &timestamp, BPF_ANY);
+            timestamp = conn->timestamp;
             type = EVENT_TYPE_CONNECTION_OPEN;
         } else if (args.newstate == BPF_TCP_CLOSE) {
+            bpf_map_delete_elem(&active_connections, cid);
             type = EVENT_TYPE_CONNECTION_ERROR;
         }
-        pid = i->pid;
-        fd = i->fd;
-        bpf_map_delete_elem(&sk_info, &args.skaddr);
+        duration = bpf_ktime_get_ns() - conn->timestamp;
+        pid = cid->pid;
+        fd = cid->fd;
     }
     if (args.oldstate == BPF_TCP_ESTABLISHED && (args.newstate == BPF_TCP_FIN_WAIT1 || args.newstate == BPF_TCP_CLOSE_WAIT)) {
-        pid = 0;
-        type = EVENT_TYPE_CONNECTION_CLOSE;
+        bpf_map_delete_elem(&connection_id_by_socket, &args.skaddr);
     }
     if (args.oldstate == BPF_TCP_CLOSE && args.newstate == BPF_TCP_LISTEN) {
         type = EVENT_TYPE_LISTEN_OPEN;
@@ -135,13 +188,17 @@ int inet_sock_set_state(void *ctx)
     if (type == 0) {
         return 0;
     }
-
-    struct tcp_event e = {};
     e.type = type;
+    e.duration = duration;
     e.timestamp = timestamp;
+    e.first_read_time = 0;
+    e.first_write_time = 0;
+    e.new_read_time = 0;
     e.pid = pid;
     e.sport = args.sport;
     e.dport = args.dport;
+    // e.sport = bpf_ntohs(args.sport);  
+    // e.dport = bpf_ntohs(args.dport);
     e.fd = fd;
     __builtin_memcpy(&e.saddr, &args.saddr_v6, sizeof(e.saddr));
     __builtin_memcpy(&e.daddr, &args.daddr_v6, sizeof(e.saddr));
@@ -164,40 +221,358 @@ int sys_enter_connect(void *ctx) {
         return 0;
     }
     __u64 id = bpf_get_current_pid_tgid();
+    __u64 pid = id >> 32;
+    cw_bpf_debug("fucksocket pid=%lld sys_enter_connect -- id=%lld, fd=%lld\n", pid, id, args.fd);
     bpf_map_update_elem(&fd_by_pid_tgid, &id, &args.fd, BPF_ANY);
     return 0;
 }
 
 SEC("tracepoint/syscalls/sys_exit_connect")
-int sys_exit_connect(void *ctx) {
+int sys_exit_connect(struct trace_event_raw_sys_exit__stub* ctx) {
     __u64 id = bpf_get_current_pid_tgid();
+    __u64 *fdp = bpf_map_lookup_elem(&fd_by_pid_tgid, &id);
+    if (!fdp) {
+        return 0;
+    }
+    struct connection_id cid = {};
+    cid.pid = id >> 32;
+    cid.fd = *fdp;
+    struct connection *conn = bpf_map_lookup_elem(&active_connections, &cid);
+    if (!conn && ctx->ret == 0) { // non-TCP connection
+        struct connection conn = {};
+        conn.timestamp = bpf_ktime_get_ns();
+        conn.first_read_time = 0;
+        conn.first_write_time = 0;
+        cw_bpf_debug("fucksocket pid=%lld sys_exit_connect -- id=%lld, fd=%lld\n", cid.pid, id, cid.fd);
+        bpf_map_update_elem(&active_connections, &cid, &conn, BPF_ANY);
+    }
     bpf_map_delete_elem(&fd_by_pid_tgid, &id);
     return 0;
 }
 
-static inline __attribute__((__always_inline__))
-int trace_exit_accept(struct trace_event_raw_sys_exit__stub* ctx) {
-    if (ctx->ret < 0) {
+SEC("tracepoint/syscalls/sys_enter_close")
+int sys_enter_close(void *ctx) {
+    struct trace_event_raw_args_with_fd__stub args = {};
+    if (bpf_probe_read(&args, sizeof(args), ctx) < 0) {
         return 0;
     }
     __u64 id = bpf_get_current_pid_tgid();
-    struct sk_info k = {};
-    k.pid = id >> 32;
-    k.fd = ctx->ret;
-    __u64 invalid_timestamp = 0;
-    bpf_map_update_elem(&connection_timestamps, &k, &invalid_timestamp, BPF_ANY);
+    struct connection_id cid = {};
+    cid.pid = id >> 32;
+    cid.fd = args.fd;
+    struct connection *conn = bpf_map_lookup_elem(&active_connections, &cid);
+    if (cid.pid == 3269744) {
+        cw_bpf_debug("fucksocket pid=%lld sys_enter_close -- id=%lld, fd=%lld\n", cid.pid, id, cid.fd);
+    }
+    cw_bpf_debug("socket accept socket sys_enter_close connection before -- cid.pid=%lld, cid.fd=%lld\n", cid.pid, cid.fd);
+    if (conn) {
+        if (cid.pid == 3269744) {
+            cw_bpf_debug("fucksocket pid=%lld sys_enter_close2 -- id=%lld, fd=%lld\n", cid.pid, id, cid.fd);
+        }
+        cw_bpf_debug("socket accept socket sys_enter_close connection before cid.pid=%lld, cid.fd=%lld\n", conn->bytes_sent, conn->bytes_received);
+        struct tcp_event e = {};
+        e.type = EVENT_TYPE_CONNECTION_CLOSE;
+        e.pid = cid.pid;
+        e.fd = cid.fd;
+        e.bytes_sent = conn->bytes_sent;
+        e.bytes_received = conn->bytes_received;
+        e.timestamp = conn->timestamp;
+        e.first_read_time = conn->first_read_time;
+        e.first_write_time = conn->first_write_time;
+        e.new_read_time = conn->new_read_time;
+        bpf_perf_event_output(ctx, &tcp_connect_events, BPF_F_CURRENT_CPU, &e, sizeof(e));
+        bpf_map_delete_elem(&active_connections, &cid);
+    }
+    cw_bpf_debug("socket accept socket sys_enter_close accept_Connection before cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+    struct accept_connection *acceptConn = bpf_map_lookup_elem(&active_accepts, &cid);
+    if (acceptConn) {
+        // struct tcp_event e = {};
+        // e.type = EVENT_TYPE_ACCEPT_CLOSE;
+        // e.pid = cid.pid;
+        // e.fd = cid.fd;
+        // e.bytes_sent = acceptConn->bytes_sent;
+        // e.bytes_received = acceptConn->bytes_received;
+        // e.timestamp = acceptConn->timestamp;
+        // bpf_perf_event_output(ctx, &tcp_accept_events, BPF_F_CURRENT_CPU, &e, sizeof(e));
+        bpf_map_delete_elem(&active_accepts, &cid);
+        // cw_bpf_debug("socket accept socket sys_enter_close accept_Connection cid.pid=%d, cid.fd=%d\n", cid.pid, cid.fd);
+        // cw_bpf_debug("socket accept socket sys_enter_close accept_Connection cid.bytes_sent=%d, cid.bytes_received=%d\n", e.bytes_sent, e.bytes_received);
+    }
+
+    //TODO 2,增加active_accept 对应的判断,类比234行操作,新增EVENT_TYPE_accept_conn_CLOSE类型
+
+    //TODO 3 bpf_map_delete_elem(&active_accept, &cid);
     return 0;
 }
 
-SEC("tracepoint/syscalls/sys_exit_accept")
-int sys_exit_accept(struct trace_event_raw_sys_exit__stub* ctx) {
-    return trace_exit_accept(ctx);
+void u32_to_ip(__u32 ip, unsigned char* bytes) {  
+    // 将32位整数拆分为四个8位整数  
+    // unsigned char bytes[4];  
+    bytes[15] = (ip >> 24) & 0xFF;  
+    bytes[14] = (ip >> 16) & 0xFF;  
+    bytes[13] = (ip >> 8) & 0xFF;  
+    bytes[12] = ip & 0xFF;  
+    bytes[11] = 0xFF;  
+    bytes[10] = 0xFF;  
+
+    // 使用sprintf将这些整数格式化为字符串  
+    cw_bpf_debug("[Go] [socket/IP: %u.%u", bytes[15], bytes[14]);  
+    cw_bpf_debug("[Go] [socket/IP: %u.%u", bytes[13], bytes[12]);  
+}  
+
+
+// 用于存储文件描述符和套接字指针的 map  
+struct {  
+    __uint(type, BPF_MAP_TYPE_HASH);  
+    __type(key, __u64);  // 使用进程 ID 作为键  
+    __type(value, struct sock *);  
+    __uint(max_entries, 1024);  
+} socket_map SEC(".maps");  
+
+
+struct ipv4_tuple_t {  
+    __u32 saddr;  
+    __u32 daddr;  
+    __u16 sport;  
+    __u16 dport;  
+    __u8  protocol;  
+};
+
+SEC("kretprobe/inet_csk_accept")
+int kprobeinet_csk_accept(struct pt_regs *ctx) {
+    cw_bpf_debug("socket inet_csk_accept Connection exit pid_tgid: pid_tgid=\n");
+    __u64 pid_tgid = bpf_get_current_pid_tgid();
+    cw_bpf_debug("socket inet_csk_accept Connection exit pid_tgid: pid_tgid=%d\n", pid_tgid);
+    struct sock *sk = (struct sock *)PT_REGS_RC(ctx);
+    // __u16 family = 0;
+    // bpf_probe_read(&family, sizeof(family), &sk->__sk_common.skc_family);
+    // cw_bpf_debug("socket inet_csk_accept Connection family: family=%d\n", family);
+    // if (family == AF_INET)
+	// {
+    //     cw_bpf_debug("socket inet_csk_accept Connection family: IPv4=%d\n", family);
+    // }
+    // struct ipv4_tuple_t tuple = {};  
+    // // 从 __sk_common 获取信息  
+    // bpf_probe_read(&tuple.saddr, sizeof(tuple.saddr), &sk->__sk_common.skc_rcv_saddr);  
+    // bpf_probe_read(&tuple.daddr, sizeof(tuple.daddr), &sk->__sk_common.skc_daddr);  
+    // bpf_probe_read(&tuple.sport, sizeof(tuple.sport), &sk->__sk_common.skc_num);  
+    // bpf_probe_read(&tuple.dport, sizeof(tuple.dport), &sk->__sk_common.skc_dport);  
+
+    // tuple.sport = bpf_ntohs(tuple.sport);  
+    // tuple.dport = bpf_ntohs(tuple.dport);
+
+    // __u64 hash;
+    // bpf_probe_read(&hash, sizeof(hash), &sk->__sk_common.skc_hash);
+
+    // cw_bpf_debug("socket inet_csk_accept Connection accepted: sk=%x, hash: %lld\n", sk, hash);
+    // cw_bpf_debug("socket inet_csk_accept Connection accepted: dport=%d, lport=%d\n", tuple.dport, tuple.sport);
+    // cw_bpf_debug("socket inet_csk_accept Connection accepted: saddr=%lld, daddr=%lld\n", tuple.saddr, tuple.daddr);
+    // u32_to_ip(tuple.saddr);
+    // u32_to_ip(tuple.daddr);
+    // 将进程 ID 关联到 `struct sock` 指针  
+    bpf_map_update_elem(&socket_map, &pid_tgid, &sk, BPF_ANY);  
+
+    return 0;
 }
 
-SEC("tracepoint/syscalls/sys_exit_accept4")
-int sys_exit_accept4(struct trace_event_raw_sys_exit__stub* ctx) {
-    return trace_exit_accept(ctx);
+struct sys_exit_accept4_ctx {
+	__u64 __unused_syscall_header;
+	__u32 __unused_syscall_nr;
+	long ret;
+};
+struct sys_enter_accept4_ctx {
+	__u64 __unused_syscall_header;
+	__u32 __unused_syscall_nr;
+
+	long fd;
+	__u64 *sockaddr;
+	int addrlen;
+};
+
+struct sys_exit_accept_ctx {
+	__u64 __unused_syscall_header;
+	__u32 __unused_syscall_nr;
+	long ret;
+};
+// 在系统调用accept返回时挂钩获取文件描述符  
+SEC("tracepoint/syscalls/sys_enter_accept4")  
+int tracepoint__sys_enter_accept4(struct sys_enter_accept4_ctx *ctx) {  
+    __u64 pid_tgid = bpf_get_current_pid_tgid();  
+    cw_bpf_debug("[Go] [socket/tracepoint__sys_entry_accept4]getget: rdi_ptr::pid: %d,-- %d\n", pid_tgid, ctx->fd);
+    return 0;  
+}  
+
+SEC("tracepoint/syscalls/sys_enter_accept")  
+int tracepoint__sys_enter_accept(struct trace_event_raw_sys_enter *ctx) {  
+    __u64 pid_tgid = bpf_get_current_pid_tgid();  
+    cw_bpf_debug("[Go] [socket/tracepoint__sys_entry_accept----]getget: rdi_ptr::pid: %d\n", pid_tgid);  
+    return 0;  
+} 
+
+SEC("tracepoint/syscalls/sys_exit_accept")
+int sys_exit_accept(struct sys_exit_accept_ctx *ctx)
+{
+    long fd = ctx->ret;  
+    __u64 pid_tgid = bpf_get_current_pid_tgid();  
+    cw_bpf_debug("[Go] [socket/tracepoint__sys_exit_accept-----]getget: rdi_ptr::pid: %d,-- %d\n", pid_tgid, fd);
+    // bpf_map_update_elem(&fd_by_pid_tgid, &pid_tgid, &fd, BPF_ANY);
+    struct sock **skp;  
+    // 从 map 中获取 `struct sock` 指针  
+    skp = bpf_map_lookup_elem(&socket_map, &pid_tgid);  
+    if (skp && fd > 0) {
+        struct sock *sk = *skp;
+        __u16 family = 0;
+        bpf_probe_read(&family, sizeof(family), &sk->__sk_common.skc_family);
+        cw_bpf_debug("socket sys_exit_accept--- family: family=%d\n", family);
+        if (family == AF_INET)
+        {
+            cw_bpf_debug("socket sys_exit_accept--- family: IPv4=%d\n", family);
+        }
+        struct ipv4_tuple_t tuple = {};  
+        // 从 __sk_common 获取信息  
+        bpf_probe_read(&tuple.saddr, sizeof(tuple.saddr), &sk->__sk_common.skc_rcv_saddr);  
+        bpf_probe_read(&tuple.daddr, sizeof(tuple.daddr), &sk->__sk_common.skc_daddr);  
+        bpf_probe_read(&tuple.sport, sizeof(tuple.sport), &sk->__sk_common.skc_num);  
+        bpf_probe_read(&tuple.dport, sizeof(tuple.dport), &sk->__sk_common.skc_dport);  
+
+        // tuple.sport = bpf_ntohs(tuple.sport);  
+        tuple.dport = bpf_ntohs(tuple.dport);
+
+        __u64 hash;
+        bpf_probe_read(&hash, sizeof(hash), &sk->__sk_common.skc_hash);
+
+        cw_bpf_debug("socket sys_exit_accept--- sk=%x, hash: %lld\n", sk, hash);
+        cw_bpf_debug("socket sys_exit_accept--- dport=%d, lport=%d\n", tuple.dport, tuple.sport);
+        cw_bpf_debug("socket sys_exit_accept--- saddr=%lld, daddr=%lld\n", tuple.saddr, tuple.daddr);
+        unsigned char saddr[16] = {};
+        unsigned char daddr[16] = {};
+        u32_to_ip(tuple.saddr, saddr);
+        u32_to_ip(tuple.daddr, daddr);
+
+        void *map = &tcp_accept_events;
+
+        struct tcp_event e = {};
+
+        e.type = EVENT_TYPE_ACCEPT_OPEN;
+        e.duration = 0;
+        e.timestamp = 0;
+        e.pid = pid_tgid >> 32;
+        e.sport = tuple.sport;
+        e.dport = tuple.dport;
+        e.fd = fd;
+        __builtin_memcpy(&e.saddr, &saddr, sizeof(e.saddr));
+        __builtin_memcpy(&e.daddr, &daddr, sizeof(e.daddr));
+        cw_bpf_debug("socket sys_exit_accept--- addraddraddr saddr=%llu, saddr=%llu\n", e.saddr[10], e.saddr[11]);
+        cw_bpf_debug("socket sys_exit_accept--- addraddraddr saddr=%llu, saddr=%llu\n", e.saddr[12], e.saddr[13]);
+        cw_bpf_debug("socket sys_exit_accept--- addraddraddr saddr=%llu, saddr=%llu\n", e.saddr[14], e.saddr[15]);
+
+        cw_bpf_debug("socket sys_exit_accept--- addraddraddr daddr=%llu, daddr=%llu\n", e.daddr[10], e.daddr[11]);
+        cw_bpf_debug("socket sys_exit_accept--- addraddraddr daddr=%llu, daddr=%llu\n", e.daddr[12], e.daddr[13]);
+        cw_bpf_debug("socket sys_exit_accept--- addraddraddr daddr=%llu, daddr=%llu\n", e.daddr[14], e.daddr[15]);
+
+        bpf_perf_event_output(ctx, map, BPF_F_CURRENT_CPU, &e, sizeof(e));
+        struct connection_id cid = {};
+        cid.pid = pid_tgid >> 32;
+        cid.fd = fd;
+
+        struct accept_connection conn = {};
+        conn.sport = tuple.sport;
+        conn.dport = tuple.dport;
+        __builtin_memcpy(&conn.saddr, &saddr, sizeof(conn.saddr));
+        __builtin_memcpy(&conn.daddr, &daddr, sizeof(conn.daddr));
+        cw_bpf_debug("socket accept update active_accepts before cid.pid=%d, cid.fd=%lld\n", cid.pid, cid.fd);
+        bpf_map_update_elem(&active_accepts, &cid, &conn, BPF_ANY);
+        cw_bpf_debug("socket accept update active_accepts after cid.pid=%d, cid.fd=%lld\n", cid.pid, cid.fd);
+
+        // TODO 1: tcp_accept_events 把数据发到go层。update active_accept 定义一个 e.type
+    }
+
+    // 从地图中移除项目,避免泄漏  
+    bpf_map_delete_elem(&socket_map, &pid_tgid);  
+
+    return 0;  
 }
 
+// 在系统调用accept返回时挂钩获取文件描述符  
+SEC("tracepoint/syscalls/sys_exit_accept4")  
+int tracepoint__sys_exit_accept4(struct sys_exit_accept4_ctx *ctx) {  
+    long fd = ctx->ret;  
+    __u64 pid_tgid = bpf_get_current_pid_tgid();  
+    cw_bpf_debug("[Go] [socket/tracepoint__sys_exit_accept4]getget: rdi_ptr::pid: %d,-- %d\n", pid_tgid, fd);
+    // bpf_map_update_elem(&fd_by_pid_tgid, &pid_tgid, &fd, BPF_ANY);
+    struct sock **skp;  
+    // 从 map 中获取 `struct sock` 指针  
+    skp = bpf_map_lookup_elem(&socket_map, &pid_tgid);  
+    if (skp && fd > 0) {
+        struct sock *sk = *skp;
+        __u16 family = 0;
+        bpf_probe_read(&family, sizeof(family), &sk->__sk_common.skc_family);
+        cw_bpf_debug("socket sys_exit_accept4 family: family=%d\n", family);
+        if (family == AF_INET)
+        {
+            cw_bpf_debug("socket sys_exit_accept4 family: IPv4=%d\n", family);
+        }
+        struct ipv4_tuple_t tuple = {};  
+        // 从 __sk_common 获取信息  
+        bpf_probe_read(&tuple.saddr, sizeof(tuple.saddr), &sk->__sk_common.skc_rcv_saddr);  
+        bpf_probe_read(&tuple.daddr, sizeof(tuple.daddr), &sk->__sk_common.skc_daddr);  
+        bpf_probe_read(&tuple.sport, sizeof(tuple.sport), &sk->__sk_common.skc_num);  
+        bpf_probe_read(&tuple.dport, sizeof(tuple.dport), &sk->__sk_common.skc_dport);  
+
+        // tuple.sport = bpf_ntohs(tuple.sport);  
+        tuple.dport = bpf_ntohs(tuple.dport);
+
+        __u64 hash;
+        bpf_probe_read(&hash, sizeof(hash), &sk->__sk_common.skc_hash);
+
+        cw_bpf_debug("socket sys_exit_accept4 sk=%x, hash: %lld\n", sk, hash);
+        cw_bpf_debug("socket sys_exit_accept4 dport=%d, lport=%d\n", tuple.dport, tuple.sport);
+        cw_bpf_debug("socket sys_exit_accept4 saddr=%lld, daddr=%lld\n", tuple.saddr, tuple.daddr);
+        unsigned char saddr[16] = {};
+        unsigned char daddr[16] = {};
+        u32_to_ip(tuple.saddr, saddr);
+        u32_to_ip(tuple.daddr, daddr);
+
+        void *map = &tcp_accept_events;
+
+        struct tcp_event e = {};
+
+        e.type = EVENT_TYPE_ACCEPT_OPEN;
+        e.duration = 0;
+        e.timestamp = 0;
+        e.pid = pid_tgid >> 32;
+        e.sport = tuple.sport;
+        e.dport = tuple.dport;
+        e.fd = fd;
+        __builtin_memcpy(&e.saddr, &saddr, sizeof(e.saddr));
+        __builtin_memcpy(&e.daddr, &daddr, sizeof(e.daddr));
+        cw_bpf_debug("socket sys_exit_accept4 addraddraddr saddr=%llu, saddr=%llu\n", e.saddr[10], e.saddr[11]);
+        cw_bpf_debug("socket sys_exit_accept4 addraddraddr saddr=%llu, saddr=%llu\n", e.saddr[12], e.saddr[13]);
+        cw_bpf_debug("socket sys_exit_accept4 addraddraddr saddr=%llu, saddr=%llu\n", e.saddr[14], e.saddr[15]);
+
+        cw_bpf_debug("socket sys_exit_accept4 addraddraddr daddr=%llu, daddr=%llu\n", e.daddr[10], e.daddr[11]);
+        cw_bpf_debug("socket sys_exit_accept4 addraddraddr daddr=%llu, daddr=%llu\n", e.daddr[12], e.daddr[13]);
+        cw_bpf_debug("socket sys_exit_accept4 addraddraddr daddr=%llu, daddr=%llu\n", e.daddr[14], e.daddr[15]);
+
+        bpf_perf_event_output(ctx, map, BPF_F_CURRENT_CPU, &e, sizeof(e));
+        struct connection_id cid = {};
+        cid.pid = pid_tgid >> 32;
+        cid.fd = fd;
+
+        struct accept_connection conn = {};
+        conn.sport = tuple.sport;
+        conn.dport = tuple.dport;
+        __builtin_memcpy(&conn.saddr, &saddr, sizeof(conn.saddr));
+        __builtin_memcpy(&conn.daddr, &daddr, sizeof(conn.daddr));
+        cw_bpf_debug("socket accept update active_accepts before cid.pid=%d, cid.fd=%lld\n", cid.pid, cid.fd);
+        bpf_map_update_elem(&active_accepts, &cid, &conn, BPF_ANY);
+        cw_bpf_debug("socket accept update active_accepts after cid.pid=%d, cid.fd=%lld\n", cid.pid, cid.fd);
+
+        // TODO 1: tcp_accept_events 把数据发到go层。update active_accept 定义一个 e.type
+    }
 
+    // 从地图中移除项目,避免泄漏  
+    bpf_map_delete_elem(&socket_map, &pid_tgid);  
 
+    return 0;  
+}  

+ 5 - 3
ebpftracer/init.go

@@ -23,17 +23,18 @@ func readFds(pids []uint32) (files []file, socks []sock) {
 	for _, pid := range pids {
 		ns, err := proc.GetNetNs(pid)
 		if err != nil {
+			klog.Warningf("failed to get net ns for %d: %s", pid, err)
 			continue
 		}
 		nsId := ns.UniqueId()
 		sockets, ok := nss[nsId]
 		_ = ns.Close()
 		if !ok {
-			sockets = map[string]sock{}
-			nss[nsId] = sockets
 			if ss, err := proc.GetSockets(pid); err != nil {
-				klog.Warningln(err)
+				klog.Warningf("failed to get sockets for %d: %s", pid, err)
 			} else {
+				sockets = map[string]sock{}
+				nss[nsId] = sockets
 				for _, s := range ss {
 					sockets[s.Inode] = sock{Sock: s}
 				}
@@ -42,6 +43,7 @@ func readFds(pids []uint32) (files []file, socks []sock) {
 
 		fds, err := proc.ReadFds(pid)
 		if err != nil {
+			klog.Warningf("failed to read fds for %d: %s", pid, err)
 			continue
 		}
 		for _, fd := range fds {

+ 3 - 0
ebpftracer/l7/l7.go

@@ -3,6 +3,7 @@ package l7
 import (
 	"strconv"
 	"time"
+	"inet.af/netaddr"
 )
 
 type Protocol uint8
@@ -155,6 +156,8 @@ type RequestData struct {
 	SpanId            string
 	StartAt           uint64
 	EndAt             uint64
+    SAddr          	  netaddr.IPPort
+	DAddr          	  netaddr.IPPort
 	ParentSpanContext struct {
 		TraceIdFrom    string
 		CalledId       string

+ 86 - 0
ebpftracer/python.go

@@ -0,0 +1,86 @@
+package ebpftracer
+
+import (
+	"bufio"
+	"os"
+	"regexp"
+	"strings"
+
+	"github.com/cilium/ebpf/link"
+	"github.com/coroot/coroot-node-agent/proc"
+	"golang.org/x/exp/maps"
+	"k8s.io/klog/v2"
+)
+
+var (
+	libcRegexp = regexp.MustCompile(`libc[\.-]`)
+	muslRegexp = regexp.MustCompile(`musl[\.-]`)
+)
+
+func (t *Tracer) AttachPythonThreadLockProbes(pid uint32) []link.Link {
+	log := func(libPath, msg string, err error) {
+		if err != nil {
+			for _, s := range []string{"no such file or directory", "no such process", "permission denied"} {
+				if strings.HasSuffix(err.Error(), s) {
+					return
+				}
+			}
+			klog.ErrorfDepth(1, "pid=%d lib=%s: %s: %s", pid, libPath, msg, err)
+			return
+		}
+		klog.InfofDepth(1, "pid=%d lib=%s: %s", pid, libPath, msg)
+	}
+
+	var (
+		lastErr error
+		links   []link.Link
+		libPath string
+	)
+
+	for _, libPath = range getPthreadLibs(pid) {
+		exe, err := link.OpenExecutable(libPath)
+		if err != nil {
+			log(libPath, "failed to open executable", err)
+			return nil
+		}
+		var uprobe, uretprobe link.Link
+		uprobe, lastErr = exe.Uprobe("pthread_cond_timedwait", t.uprobes["pthread_cond_timedwait_enter"], nil)
+		if lastErr != nil {
+			continue
+		}
+		links = append(links, uprobe)
+		uretprobe, lastErr = exe.Uretprobe("pthread_cond_timedwait", t.uprobes["pthread_cond_timedwait_exit"], nil)
+		if lastErr != nil {
+			continue
+		}
+		links = append(links, uretprobe)
+		log(libPath, "python uprobes attached", nil)
+		break
+	}
+	if lastErr != nil {
+		log(libPath, "failed to attach uprobe", lastErr)
+	}
+	return links
+}
+
+func getPthreadLibs(pid uint32) []string {
+	f, err := os.Open(proc.Path(pid, "maps"))
+	if err != nil {
+		return nil
+	}
+	defer f.Close()
+	scanner := bufio.NewScanner(f)
+	scanner.Split(bufio.ScanLines)
+	libs := map[string]bool{}
+	for scanner.Scan() {
+		parts := strings.Fields(scanner.Text())
+		if len(parts) <= 5 {
+			continue
+		}
+		libPath := parts[5]
+		if libcRegexp.MatchString(libPath) || muslRegexp.MatchString(libPath) || strings.Contains(libPath, "libpthread") {
+			libs[proc.Path(pid, "root", libPath)] = true
+		}
+	}
+	return maps.Keys(libs)
+}

+ 20 - 8
ebpftracer/tls.go

@@ -7,6 +7,11 @@ import (
 	"debug/elf"
 	"errors"
 	"fmt"
+	"os"
+	"regexp"
+	"strconv"
+	"strings"
+
 	"github.com/cilium/ebpf/link"
 	"github.com/coroot/coroot-node-agent/ebpftracer/tracer"
 	"github.com/coroot/coroot-node-agent/proc"
@@ -15,10 +20,6 @@ import (
 	"golang.org/x/arch/arm64/arm64asm"
 	"golang.org/x/arch/x86/x86asm"
 	"golang.org/x/mod/semver"
-	"os"
-	"regexp"
-	"strconv"
-	"strings"
 )
 
 const (
@@ -86,24 +87,32 @@ func (t *Tracer) AttachOpenSslUprobes(pid uint32) ([]link.Link, error) {
 	}
 	progs := []prog{
 		{symbol: "SSL_write", uprobe: writeEnter},
-		{symbol: "SSL_write_ex", uprobe: writeEnter},
 		{symbol: "SSL_read", uprobe: readEnter},
-		{symbol: "SSL_read_ex", uprobe: readExEnter},
 		{symbol: "SSL_read", uretprobe: readExit},
-		{symbol: "SSL_read_ex", uretprobe: readExit},
+	}
+	if semver.Compare(version, "v1.1.1") >= 0 {
+		progs = append(progs, []prog{
+			{symbol: "SSL_write_ex", uprobe: writeEnter},
+			{symbol: "SSL_read_ex", uprobe: readExEnter},
+			{symbol: "SSL_read_ex", uretprobe: readExit},
+		}...)
 	}
 	for _, p := range progs {
 		if p.uprobe != "" {
 			l, err := exe.Uprobe(p.symbol, t.uprobes[p.uprobe], nil)
+			klog.Infoln("fucktls crypto/tls uprobes attached", p.symbol)
 			if err != nil {
 				//log("failed to attach uprobe", err)
+				klog.Infoln("fucktls crypto/tls uprobes attached error", p.symbol)
 				return nil, err
 			}
 			links = append(links, l)
 		}
 		if p.uretprobe != "" {
+			klog.Infoln("fucktls crypto/tls uprobes attached ret", p.symbol)
 			l, err := exe.Uretprobe(p.symbol, t.uprobes[p.uretprobe], nil)
 			if err != nil {
+				klog.Infoln("fucktls crypto/tls uprobes attached ret error", p.symbol)
 				//log("failed to attach uretprobe", err)
 				return nil, err
 			}
@@ -143,6 +152,7 @@ func (t *Tracer) AttachGoTlsUprobes(pid uint32, appInfo *AppInfo, codeType uint1
 		log("failed to read build info", err)
 		return nil, err
 	}
+	// isGolangApp = true
 
 	name, err = os.Readlink(path)
 	if err != nil {
@@ -261,7 +271,7 @@ func (t *Tracer) AttachGoTlsUprobes(pid uint32, appInfo *AppInfo, codeType uint1
 			continue
 		}
 		switch s.Name {
-		//case goTlsWriteSymbol, goTlsReadSymbol:
+		case goTlsWriteSymbol, goTlsReadSymbol:
 		case goExecute, goNewproc1, goRunqget, goServeHTTP, goTransport:
 		default:
 			continue
@@ -408,6 +418,7 @@ func (t *Tracer) AttachGoTlsUprobes(pid uint32, appInfo *AppInfo, codeType uint1
 			}
 
 		case goTlsWriteSymbol:
+			klog.Infoln("fucktls goTlsWriteSymbol crypto/tls uprobes attached")
 			l, err := exe.Uprobe(s.Name, t.uprobes["go_crypto_tls_write_enter"], &link.UprobeOptions{Address: address})
 			if err != nil {
 				klog.WithError(err).Errorln("failed to attach write_enter uprobe")
@@ -415,6 +426,7 @@ func (t *Tracer) AttachGoTlsUprobes(pid uint32, appInfo *AppInfo, codeType uint1
 			}
 			links = append(links, l)
 		case goTlsReadSymbol:
+			klog.Infoln("fucktls goTlsReadSymbol crypto/tls uprobes attached")
 			l, err := exe.Uprobe(s.Name, t.uprobes["go_crypto_tls_read_enter"], &link.UprobeOptions{Address: address})
 			if err != nil {
 				klog.WithError(err).Errorln("failed to attach read_enter uprobe")

+ 125 - 28
ebpftracer/tracer.go

@@ -95,32 +95,45 @@ const (
 	EventTypeL7Request       EventType = 10
 	EventTypeFunEnt          EventType = 11
 	EventTypeFunRet          EventType = 12
+	EventTypeAcceptOpen      EventType = 13
+	EventTypeAcceptClose     EventType = 14
 
 	EventReasonNone    EventReason = 0
 	EventReasonOOMKill EventReason = 1
 )
 
+type TrafficStats struct {
+	BytesSent     uint64
+	BytesReceived uint64
+}
+
 type Event struct {
-	Type       EventType
-	Reason     EventReason
-	Pid        uint32
-	SrcAddr    netaddr.IPPort
-	DstAddr    netaddr.IPPort
-	Fd         uint64
-	Timestamp  uint64
-	L7Request  *l7.RequestData
-	StackEvent *StackEvent
+	StackEvent     *StackEvent
+	Type           EventType
+	Reason         EventReason
+	Pid            uint32
+	SrcAddr        netaddr.IPPort
+	DstAddr        netaddr.IPPort
+	Fd             uint64
+	Timestamp      uint64
+	Duration       time.Duration
+	L7Request      *l7.RequestData
+	TrafficStats   *TrafficStats
+	FirstReadTime  uint64
+	FirstWriteTime uint64
+	NewReadTime    uint64
 }
 
 type perfMapType uint8
 
 const (
-	perfMapTypeProcEvents   perfMapType = 1
-	perfMapTypeTCPEvents    perfMapType = 2
-	perfMapTypeFileEvents   perfMapType = 3
-	perfMapTypeL7Events     perfMapType = 4
-	perfMapTypeSocketEvents perfMapType = 5
-	perfMapTypeEventQueue   perfMapType = 6
+	perfMapTypeProcEvents         perfMapType = 1
+	perfMapTypeTCPEvents          perfMapType = 2
+	perfMapTypeFileEvents         perfMapType = 3
+	perfMapTypeL7Events           perfMapType = 4
+	perfMapTypeSocketEvents       perfMapType = 5
+	perfMapTypeEventQueue         perfMapType = 6
+	perfMapTypePythonThreadEvents perfMapType = 7
 )
 
 type Tracer struct {
@@ -203,6 +216,8 @@ func (t *Tracer) init(ch chan<- Event) error {
 		}
 	}
 
+	ebpfConnectionsMap := t.collection.Maps["active_connections"]
+	timestamp := uint64(time.Now().UnixNano())
 	for _, s := range sockets {
 		typ := EventTypeConnectionOpen
 		if s.Listen {
@@ -211,16 +226,44 @@ func (t *Tracer) init(ch chan<- Event) error {
 			continue
 		}
 		ch <- Event{
-			Type:    typ,
-			Pid:     s.pid,
-			Fd:      s.fd,
-			SrcAddr: s.SAddr,
-			DstAddr: s.DAddr,
+			Type:      typ,
+			Pid:       s.pid,
+			Timestamp: timestamp,
+			Fd:        s.fd,
+			SrcAddr:   s.SAddr,
+			DstAddr:   s.DAddr,
+		}
+		if typ == EventTypeConnectionOpen {
+			id := ConnectionId{FD: s.fd, PID: s.pid}
+			conn := Connection{Timestamp: timestamp}
+			if err := ebpfConnectionsMap.Update(id, conn, ebpf.UpdateNoExist); err != nil {
+				klog.Warningln(err)
+			}
 		}
 	}
 	return nil
 }
 
+func (t *Tracer) ActiveConnectionsIterator() *ebpf.MapIterator {
+	return t.collection.Maps["active_connections"].Iterate()
+}
+
+func (t *Tracer) ActiveAcceptsIterator() *ebpf.MapIterator {
+	return t.collection.Maps["active_accepts"].Iterate()
+}
+
+type ConnectionId struct {
+	FD  uint64
+	PID uint32
+	_   uint32
+}
+
+type Connection struct {
+	Timestamp     uint64
+	BytesSent     uint64
+	BytesReceived uint64
+}
+
 type perfMap struct {
 	name                  string
 	perCPUBufferSizePages int
@@ -272,9 +315,11 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
 		{name: "proc_events", typ: perfMapTypeProcEvents, perCPUBufferSizePages: 4},
 		{name: "tcp_listen_events", typ: perfMapTypeTCPEvents, perCPUBufferSizePages: 4},
 		{name: "tcp_connect_events", typ: perfMapTypeTCPEvents, perCPUBufferSizePages: 8},
+		{name: "tcp_accept_events", typ: perfMapTypeTCPEvents, perCPUBufferSizePages: 8},
 		{name: "tcp_retransmit_events", typ: perfMapTypeTCPEvents, perCPUBufferSizePages: 4},
 		{name: "file_events", typ: perfMapTypeFileEvents, perCPUBufferSizePages: 4},
 		{name: "event_queue", typ: perfMapTypeEventQueue, perCPUBufferSizePages: 32},
+		{name: "python_thread_events", typ: perfMapTypePythonThreadEvents, perCPUBufferSizePages: 4},
 	}
 	tracer.MapInsert(c)
 	if !t.DisableL7Tracing() {
@@ -325,6 +370,11 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
 				t.uprobes[programSpec.Name] = program
 				continue
 			}
+			if strings.HasPrefix(programSpec.SectionName, "kretprobe/") {
+				l, err = link.Kretprobe(programSpec.AttachTo, program, nil)
+				t.links = append(t.links, l)
+				continue
+			}
 			l, err = link.Kprobe(programSpec.AttachTo, program, nil)
 		}
 		if err != nil {
@@ -381,14 +431,20 @@ type procEvent struct {
 }
 
 type tcpEvent struct {
-	Fd        uint64
-	Timestamp uint64
-	Type      EventType
-	Pid       uint32
-	SPort     uint16
-	DPort     uint16
-	SAddr     [16]byte
-	DAddr     [16]byte
+	Fd             uint64
+	Timestamp      uint64
+	Duration       uint64
+	FirstReadTime  uint64
+	FirstWriteTime uint64
+	NewReadTime    uint64
+	Type           EventType
+	Pid            uint32
+	BytesSent      uint64
+	BytesReceived  uint64
+	SPort          uint16
+	DPort          uint16
+	SAddr          [16]byte
+	DAddr          [16]byte
 }
 
 type fileEvent struct {
@@ -415,6 +471,10 @@ type l7Event struct {
 	TraceStart          uint32
 	TraceEnd            uint32
 	EventCount          uint32
+	Sport				uint16
+    Dport				uint16
+    SAddr          		[16]byte
+	DAddr          		[16]byte
 	AssumedAppId        HashByte
 	SpanId              HashByte
 	TraceIdFrom         HashByte16
@@ -490,6 +550,12 @@ type StackFunEvent struct {
 	Uprobe     *tracer.Uprobe
 }
 
+type pythonThreadEvent struct {
+	Type     EventType
+	Pid      uint32
+	Duration uint64
+}
+
 func runEventsReader(name string, r *perf.Reader, ch chan<- Event, typ perfMapType) {
 	for {
 		rec, err := r.Read()
@@ -649,6 +715,10 @@ func runEventsReader(name string, r *perf.Reader, ch chan<- Event, typ perfMapTy
 				req.ParentSpanContext.InstanceIdFrom = hex.EncodeToString(v.InstanceIdFrom[:])
 				req.ParentSpanContext.AppIdFrom = hex.EncodeToString(v.AppIdFrom[:])
 				req.ParentSpanContext.SpanIdFrom = hex.EncodeToString(v.SpanIdFrom[:])
+				req.SAddr = ipPort(v.SAddr,v.Sport)
+				req.DAddr = ipPort(v.DAddr,v.Dport)
+				klog.Infof("runEventsReader SAddr.String %s", req.SAddr.String())
+				klog.Infof("runEventsReader DAddr.String %s", req.DAddr.String())
 			}
 			switch {
 			case v.PayloadSize == 0:
@@ -688,6 +758,33 @@ func runEventsReader(name string, r *perf.Reader, ch chan<- Event, typ perfMapTy
 				DstAddr:   ipPort(v.DAddr, v.DPort),
 				Fd:        v.Fd,
 				Timestamp: v.Timestamp,
+				Duration:  time.Duration(v.Duration),
+			}
+			if v.Type == EventTypeConnectionClose {
+				event.TrafficStats = &TrafficStats{
+					BytesSent:     v.BytesSent,
+					BytesReceived: v.BytesReceived,
+				}
+			}
+			event.FirstReadTime = v.FirstReadTime
+			event.FirstWriteTime = v.FirstWriteTime
+			event.NewReadTime = v.NewReadTime
+			if v.Type == EventTypeAcceptClose {
+				event.TrafficStats = &TrafficStats{
+					BytesSent:     v.BytesSent,
+					BytesReceived: v.BytesReceived,
+				}
+			}
+		case perfMapTypePythonThreadEvents:
+			v := &pythonThreadEvent{}
+			if err := binary.Read(bytes.NewBuffer(rec.RawSample), binary.LittleEndian, v); err != nil {
+				klog.Warningln("failed to read msg:", err)
+				continue
+			}
+			event = Event{
+				Type:     v.Type,
+				Pid:      v.Pid,
+				Duration: time.Duration(v.Duration),
 			}
 		case perfMapTypeEventQueue:
 			v := &StackEvent{}

+ 7 - 2
flags/flags.go

@@ -37,8 +37,12 @@ var (
 	DisablePinger             = kingpin.Flag("disable-pinger", "Don't ping upstreams").Default("false").Envar("DISABLE_PINGER").Bool()
 	DisableL7Tracing          = kingpin.Flag("disable-l7-tracing", "Disable L7 tracing").Default("false").Envar("DISABLE_L7_TRACING").Bool()
 
-	ExternalNetworksWhitelist = kingpin.Flag("track-public-network", "Allow track connections to the specified IP networks, all private networks are allowed by default (e.g., Y.Y.Y.Y/mask)").Envar("TRACK_PUBLIC_NETWORK").Strings()
-	EphemeralPortRange        = kingpin.Flag("ephemeral-port-range", "Destination and Listen TCP ports from this range will be skipped").Default("").Envar("EPHEMERAL_PORT_RANGE").String()
+	ExternalNetworksWhitelist = kingpin.
+					Flag("track-public-network", "Allow track connections to the specified IP networks, all private networks are allowed by default (e.g., Y.Y.Y.Y/mask)").
+					Envar("TRACK_PUBLIC_NETWORK").
+					Default("0.0.0.0/0").
+					Strings()
+	EphemeralPortRange = kingpin.Flag("ephemeral-port-range", "Destination and Listen TCP ports from this range will be skipped").Default("42768-60999").Envar("EPHEMERAL_PORT_RANGE").String()
 
 	Provider          = kingpin.Flag("provider", "`provider` label for `node_cloud_info` metric").Envar("PROVIDER").String()
 	Region            = kingpin.Flag("region", "`region` label for `node_cloud_info` metric").Envar("REGION").String()
@@ -54,6 +58,7 @@ var (
 	TracesEndpoint    = kingpin.Flag("traces-endpoint", "The URL of the endpoint to send traces to").Envar("TRACES_ENDPOINT").URL()
 	LogsEndpoint      = kingpin.Flag("logs-endpoint", "The URL of the endpoint to send logs to").Envar("LOGS_ENDPOINT").URL()
 	ProfilesEndpoint  = kingpin.Flag("profiles-endpoint", "The URL of the endpoint to send profiles to").Envar("PROFILES_ENDPOINT").URL()
+	InsecureSkipVerify = kingpin.Flag("insecure-skip-verify", "whether to skip verifying the certificate or not").Envar("INSECURE_SKIP_VERIFY").Default("false").Bool()
 
 	ScrapeInterval = kingpin.Flag("scrape-interval", "How often to gather metrics from the agent").Default("15s").Envar("SCRAPE_INTERVAL").Duration()
 	WalDir         = kingpin.Flag("wal-dir", "Path to where the agent stores data (e.g. the metrics Write-Ahead Log)").Default("/tmp/coroot-node-agent").Envar("WAL_DIR").String()

+ 3 - 3
go.mod

@@ -10,13 +10,14 @@ require (
 	github.com/containerd/cgroups v1.0.4
 	github.com/containerd/containerd v1.6.26
 	github.com/coreos/go-systemd/v22 v22.5.0
-	github.com/coroot/logparser v1.1.2
+	github.com/coroot/logparser v1.1.5
 	github.com/docker/docker v25.0.0+incompatible
 	github.com/florianl/go-conntrack v0.3.0
 	github.com/go-kit/log v0.2.1
 	github.com/go-logr/logr v1.4.1
 	github.com/go-sql-driver/mysql v1.8.1
 	github.com/gomodule/redigo v1.9.2
+	github.com/godbus/dbus/v5 v5.0.6
 	github.com/grafana/pyroscope/ebpf v0.4.1
 	github.com/hashicorp/go-version v1.6.0
 	github.com/jedib0t/go-pretty/v6 v6.6.0
@@ -42,6 +43,7 @@ require (
 	go.opentelemetry.io/otel/sdk v1.22.0
 	go.opentelemetry.io/otel/trace v1.22.0
 	golang.org/x/arch v0.4.0
+	golang.org/x/exp v0.0.0-20240119083558-1b970713d09a
 	golang.org/x/mod v0.16.0
 	golang.org/x/net v0.22.0
 	golang.org/x/sys v0.18.0
@@ -100,7 +102,6 @@ require (
 	github.com/go-openapi/strfmt v0.22.0 // indirect
 	github.com/go-openapi/swag v0.22.4 // indirect
 	github.com/go-openapi/validate v0.22.1 // indirect
-	github.com/godbus/dbus/v5 v5.0.6 // indirect
 	github.com/gogo/googleapis v1.4.0 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang-jwt/jwt/v5 v5.2.0 // indirect
@@ -180,7 +181,6 @@ require (
 	go4.org/intern v0.0.0-20211027215823-ae77deb06f29 // indirect
 	go4.org/unsafe/assume-no-moving-gc v0.0.0-20230525183740-e7c30c78aeb2 // indirect
 	golang.org/x/crypto v0.21.0 // indirect
-	golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
 	golang.org/x/oauth2 v0.16.0 // indirect
 	golang.org/x/sync v0.6.0 // indirect
 	golang.org/x/term v0.18.0 // indirect

+ 2 - 2
go.sum

@@ -298,8 +298,8 @@ github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf/go.mod h1:E3G3o1h8I7cfc
 github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA=
 github.com/coroot/dotnetdiag v1.2.2 h1:PVP/By8o+xhPjfVolJYcjHLbFQInM7pkaD6/otPLc8Q=
 github.com/coroot/dotnetdiag v1.2.2/go.mod h1:veXCMlFzm1yNl7wwJb/ZLxO4WbzhDBoy1VG1XtkH2ls=
-github.com/coroot/logparser v1.1.2 h1:9aH4zIBle14xMHq07YHqVFE2t68k3LE10X2yKHXtJG8=
-github.com/coroot/logparser v1.1.2/go.mod h1:YfYxn9FYBm5GYHHUB4zI22irFAWVDe2bcbOWDHKSmEo=
+github.com/coroot/logparser v1.1.5 h1:gCXeJ0qeRsQWnkK9dOwEiZT3DMjCWp1MTY3ZsPoC3Bk=
+github.com/coroot/logparser v1.1.5/go.mod h1:YfYxn9FYBm5GYHHUB4zI22irFAWVDe2bcbOWDHKSmEo=
 github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU=
 github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=

+ 2 - 0
logs/otel.go

@@ -2,6 +2,7 @@ package logs
 
 import (
 	"context"
+	"crypto/tls"
 	"time"
 
 	otel "github.com/agoda-com/opentelemetry-logs-go"
@@ -36,6 +37,7 @@ func Init(machineId, hostname, version string) {
 		otlplogshttp.WithEndpoint(endpointUrl.Host),
 		otlplogshttp.WithURLPath(path),
 		otlplogshttp.WithHeaders(common.AuthHeaders()),
+		otlplogshttp.WithTLSClientConfig(&tls.Config{InsecureSkipVerify: *flags.InsecureSkipVerify}),
 	}
 	if endpointUrl.Scheme != "https" {
 		opts = append(opts, otlplogshttp.WithInsecure())

+ 83 - 11
main.go

@@ -13,6 +13,8 @@ import (
 	"path"
 	"runtime"
 	"strings"
+	"fmt"
+	"io"
 
 	"encoding/json"
 	dto "github.com/prometheus/client_model/go"
@@ -115,6 +117,16 @@ type MetricData struct {
 	Metric    []MetricItemData `json:"metric"`
 }
 
+type PostData struct {
+	AccountID string	`json:"accountId"`
+	IP	string			`json:"ip"`
+	HostID uint64		`json:"hostId"`
+	TimeStamp uint64	`json:"time_stamp"`
+	ServiceType uint64	`json:"service_type"`
+	HostName string 	`json:"host_name"`
+	Data []MetricData 	`json:"data"`
+}
+
 func main() {
 	runtime.GOMAXPROCS(1)
 	err := logs.InitLog(*flags.LogLevel, logs.LogConfig{
@@ -203,15 +215,38 @@ func main() {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
 			return
 		}
-		var Data []MetricData
+		var postData PostData
+		postData.AccountID = "110"
+		postData.IP = "10.0.6.105"
+		postData.HostID = 9065738471691958
+		postData.TimeStamp = 1638685401189
+		postData.ServiceType = 30002
+		postData.HostName = "master120"
 		for _, metric := range metrics {
-			if metric.GetName() != "container_net_tcp_successful_connects_total" &&
-				metric.GetName() != "container_net_tcp_failed_connects_total" &&
-				metric.GetName() != "container_net_tcp_retransmits_total" &&
-				metric.GetName() != "container_net_tcp_listen_info" &&
-				metric.GetName() != "container_http_requests_total" &&
-				metric.GetName() != "container_http_requests_duration_seconds_total" &&
-				metric.GetName() != "container_application_type" {
+			if metric.GetName() != "process_net_tcp_successful_connects_total" &&
+				metric.GetName() != "process_net_tcp_failed_connects_total" &&
+				metric.GetName() != "process_net_tcp_retransmits_total" &&
+				metric.GetName() != "process_net_tcp_listen_info" &&
+				metric.GetName() != "process_http_requests_total" &&
+				metric.GetName() != "process_http_requests_duration_seconds_total" &&
+				metric.GetName() != "process_http_requests_duration_seconds_total_count" &&
+				metric.GetName() != "process_mysql_queries_total" &&
+				metric.GetName() != "process_mysql_queries_duration_seconds_total" &&
+				metric.GetName() != "process_mysql_queries_duration_seconds_total_count" &&
+				metric.GetName() != "process_redis_queries_total" &&
+				metric.GetName() != "process_redis_queries_duration_seconds_total" &&
+				metric.GetName() != "process_redis_queries_duration_seconds_total_count" &&
+				metric.GetName() != "process_postgres_queries_total" &&
+				metric.GetName() != "process_postgres_queries_duration_seconds_total" &&
+				metric.GetName() != "process_postgres_queries_duration_seconds_total_count" &&
+				metric.GetName() != "process_application_type" &&
+				metric.GetName() != "process_net_tcp_bytes_received_per" &&
+				metric.GetName() != "process_net_tcp_bytes_sent_per" &&
+				metric.GetName() != "process_net_tcp_bytes_received_total" &&
+				metric.GetName() != "process_net_tcp_bytes_sent_total" &&
+				metric.GetName() != "process_net_tcp_data_latency" &&
+				metric.GetName() != "process_net_tcp_data_duration" &&
+				metric.GetName() != "process_net_tcp_est_time"{
 				continue
 			}
 
@@ -247,13 +282,13 @@ func main() {
 					continue
 				}
 			}
-			Data = append(Data, item)
+			postData.Data = append(postData.Data, item)
 			if metric.GetType() == dto.MetricType_HISTOGRAM {
-				Data = append(Data, itemOther)
+				postData.Data = append(postData.Data, itemOther)
 			}
 		}
 		// 将指标数据转换为JSON格式
-		jsonData, err := json.Marshal(Data)
+		jsonData, err := json.Marshal(postData)
 		//jsonData, err := json.Marshal(metrics)
 		if err != nil {
 			http.Error(w, err.Error(), http.StatusInternalServerError)
@@ -261,6 +296,43 @@ func main() {
 		}
 		w.Header().Set("Content-Type", "application/json")
 		w.Write(jsonData)
+
+		// 创建请求  
+		req, err := http.NewRequest("POST", "http://10.0.7.115:18080/api/v2/ebpf/receive", bytes.NewBuffer(jsonData))  
+		if err != nil {  
+			fmt.Println("Error:", err)  
+			return  
+		}  
+
+		// 添加 Content-Type header  
+		req.Header.Add("Content-Type", "application/json")  
+
+		// 添加一个自定义 header  
+		req.Header.Add("DataCount", "1")  
+		req.Header.Add("Account-Id", "110")  
+		req.Header.Add("ip", "127.0.0.1")  
+
+		// 创建 HTTP 客户端  
+		client := &http.Client{}  
+
+		// 发送 HTTP POST 请求  
+		response, err := client.Do(req)  
+		if err != nil {  
+			fmt.Println("Error:", err)  
+			return  
+		}  
+		defer response.Body.Close()  
+
+		// 读取响应内容  
+		responseData, err := io.ReadAll(response.Body)  
+		if err != nil {  
+			fmt.Println("Error:", err)  
+			return  
+		}  
+
+		// 输出响应状态码和响应正文  
+		fmt.Println("Status Code:", response.StatusCode)  
+		fmt.Println("Response Body:", string(responseData)) 
 	}
 
 	if err := prom.StartAgent(machineId); err != nil {

+ 6 - 5
pinger/pinger.go

@@ -176,15 +176,16 @@ func receive(conn *net.IPConn) (*net.IPAddr, *icmp.Echo, time.Time, error) {
 		}
 		return nil, nil, ts, err
 	}
-
-	if ts, err = getTimestampFromOutOfBandData(oob, oobn); err != nil {
-		return nil, nil, ts, fmt.Errorf("failed to get RX timestamp: %s", err)
-	}
-
 	echo, err := extractEchoFromPacket(pktBuf, n)
 	if err != nil {
 		return nil, nil, ts, fmt.Errorf("failed to extract ICMP Echo from IPv4 packet %s: %s", ra, err)
 	}
+	if echo == nil {
+		return nil, nil, ts, nil
+	}
+	if ts, err = getTimestampFromOutOfBandData(oob, oobn); err != nil {
+		return nil, nil, ts, fmt.Errorf("failed to get RX timestamp: %s", err)
+	}
 	return ra, echo, ts, nil
 }
 

+ 9 - 0
proc/fd.go

@@ -5,6 +5,8 @@ import (
 	"path"
 	"strconv"
 	"strings"
+
+	"k8s.io/klog/v2"
 )
 
 type Fd struct {
@@ -18,16 +20,23 @@ func ReadFds(pid uint32) ([]Fd, error) {
 	fdDir := Path(pid, "fd")
 	entries, err := os.ReadDir(fdDir)
 	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil
+		}
 		return nil, err
 	}
 	res := make([]Fd, 0, len(entries))
 	for _, entry := range entries {
 		fd, err := strconv.ParseUint(entry.Name(), 10, 64)
 		if err != nil {
+			klog.Warningf("failed to parse fd '%s': %s", entry.Name(), err)
 			continue
 		}
 		dest, err := os.Readlink(path.Join(fdDir, entry.Name()))
 		if err != nil {
+			if os.IsNotExist(err) {
+				klog.Warningf("failed to read link '%s': %s", entry.Name(), err)
+			}
 			continue
 		}
 		var socketInode string

+ 3 - 0
proc/net.go

@@ -39,6 +39,9 @@ func GetSockets(pid uint32) ([]Sock, error) {
 func readSockets(src string) ([]Sock, error) {
 	f, err := os.Open(src)
 	if err != nil {
+		if os.IsNotExist(err) {
+			return nil, nil
+		}
 		return nil, err
 	}
 	defer f.Close()

+ 4 - 0
profiling/profiling.go

@@ -2,6 +2,7 @@ package profiling
 
 import (
 	"bytes"
+	"crypto/tls"
 	"fmt"
 	"hash/fnv"
 	"io"
@@ -35,6 +36,9 @@ var (
 	constLabels labels.Labels
 	httpClient  = http.Client{
 		Timeout: UploadTimeout,
+		Transport: &http.Transport{
+			TLSClientConfig: &tls.Config{InsecureSkipVerify: *flags.InsecureSkipVerify},
+		},
 	}
 	endpointUrl  *url.URL
 	session      ebpfspy.Session

+ 3 - 0
prom/agent.go

@@ -41,6 +41,9 @@ func StartAgent(machineId string) error {
 			Headers:       common.AuthHeaders(),
 			RemoteTimeout: model.Duration(RemoteWriteTimeout),
 			QueueConfig:   config.DefaultQueueConfig,
+			HTTPClientConfig: promConfig.HTTPClientConfig{
+				TLSConfig: promConfig.TLSConfig{InsecureSkipVerify: *flags.InsecureSkipVerify},
+			},
 		},
 	)
 	cfg.ScrapeConfigs = append(cfg.ScrapeConfigs, &config.ScrapeConfig{

+ 5 - 0
tracing/apm_tracing.go

@@ -239,6 +239,11 @@ func (t *Trace) TraceEndEvent(r *l7.RequestData) {
 		//t.span.SetAttributes(attribute.String("server.span_id_from", r.ParentSpanContext.SpanIdFrom))
 	}
 
+	attr = append(attr, attribute.String("server.src_addr", r.SAddr.String()))
+	attr = append(attr, attribute.String("server.dst_addr", r.DAddr.String()))
+	klog.Infof("TraceEndEvent SAddr.String %s", r.SAddr.String())
+	klog.Infof("TraceEndEvent DAddr.String %s", r.DAddr.String())
+
 	t.appendTimestamp(&attr, r.StartAt, r.EndAt, r.Duration.Nanoseconds())
 	t.span.SetAttributes(attr...)
 	t.endReadyEvent(r.EventCount)

+ 2 - 0
tracing/tracing.go

@@ -2,6 +2,7 @@ package tracing
 
 import (
 	"context"
+	"crypto/tls"
 	"fmt"
 
 	"sync"
@@ -46,6 +47,7 @@ func Init(machineId, hostname, version string) {
 		otlptracehttp.WithEndpoint(endpointUrl.Host),
 		otlptracehttp.WithURLPath(path),
 		otlptracehttp.WithHeaders(common.AuthHeaders()),
+		otlptracehttp.WithTLSClientConfig(&tls.Config{InsecureSkipVerify: *flags.InsecureSkipVerify}),
 	}
 	if endpointUrl.Scheme != "https" {
 		opts = append(opts, otlptracehttp.WithInsecure())