Răsfoiți Sursa

Fixed #TASK_QT-9810 修改bug。

rock 1 an în urmă
părinte
comite
771ee590db
3 a modificat fișierele cu 104 adăugiri și 135 ștergeri
  1. 7 8
      containers/container.go
  2. 93 111
      containers/registry.go
  3. 4 16
      main.go

+ 7 - 8
containers/container.go

@@ -1066,11 +1066,6 @@ func (c *Container) gc(now time.Time) {
 		seenNamespaces[p.NetNsId()] = true
 		seenNamespaces[p.NetNsId()] = true
 	}
 	}
 
 
-	for ns := range c.ipsByNs {
-		if !seenNamespaces[ns] {
-			delete(c.ipsByNs, ns)
-		}
-	}
 	c.revalidateListens(now, listens)
 	c.revalidateListens(now, listens)
 	for srcDst, conn := range c.connectionsActive {
 	for srcDst, conn := range c.connectionsActive {
 		pidFd := PidFd{Pid: conn.Pid, Fd: conn.Fd}
 		pidFd := PidFd{Pid: conn.Pid, Fd: conn.Fd}
@@ -1233,11 +1228,15 @@ func (c *Container) attachTlsUprobes(tracer *ebpftracer.Tracer, pid uint32) erro
 		p.openSslUprobesChecked = true
 		p.openSslUprobesChecked = true
 	}
 	}
 	if !p.goTlsUprobesChecked {
 	if !p.goTlsUprobesChecked {
-		uprobes, isGolangApp := tracer.AttachGoTlsUprobes(pid)
-		p.isGolangApp = isGolangApp
-		p.uprobes = append(p.uprobes, uprobes...)
+		codeType := c.GetCodeTypeFromCache(pid)
+		goProbes, err := tracer.AttachGoTlsUprobes(pid, &c.AppInfo, uint16(codeType))
+		if err != nil {
+			return err
+		}
+		p.uprobes = append(p.uprobes, goProbes...)
 		p.goTlsUprobesChecked = true
 		p.goTlsUprobesChecked = true
 	}
 	}
+
 	return nil
 	return nil
 }
 }
 
 

+ 93 - 111
containers/registry.go

@@ -8,7 +8,6 @@ import (
 	. "github.com/coroot/coroot-node-agent/utils/modelse"
 	. "github.com/coroot/coroot-node-agent/utils/modelse"
 	"github.com/coroot/coroot-node-agent/utils/try"
 	"github.com/coroot/coroot-node-agent/utils/try"
 	. "github.com/coroot/coroot-node-agent/utils/worker"
 	. "github.com/coroot/coroot-node-agent/utils/worker"
-	log "github.com/sirupsen/logrus"
 	"os"
 	"os"
 	"regexp"
 	"regexp"
 	"strconv"
 	"strconv"
@@ -138,15 +137,15 @@ func NewRegistry(reg prometheus.Registerer, kernelVersion string, processInfoCh
 	if clientErr == nil {
 	if clientErr == nil {
 		// 负载健康检测
 		// 负载健康检测
 		try.Go(proxyClient.CheckEndpoints, CatchFn)
 		try.Go(proxyClient.CheckEndpoints, CatchFn)
-		log.Infof("New Proxy Client success.config_server is [%s]", "")
+		klog.Infof("New Proxy Client success.config_server is [%s]", "")
 	} else {
 	} else {
-		log.WithError(clientErr).Errorf("NewProxyClient error, Please check [export CONFIG_ENDPOINT=ip:port]")
+		klog.WithError(clientErr).Errorf("NewProxyClient error, Please check [export CONFIG_ENDPOINT=ip:port]")
 		return nil, clientErr
 		return nil, clientErr
 	}
 	}
 
 
 	r.connServer, err = NewServerHTTPWorker()
 	r.connServer, err = NewServerHTTPWorker()
 	if err != nil {
 	if err != nil {
-		log.Errorf("init connServer error:%s.", err)
+		klog.Errorf("init connServer error:%s.", err)
 		return nil, err
 		return nil, err
 	}
 	}
 
 
@@ -193,7 +192,7 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 		case now := <-gcTicker.C:
 		case now := <-gcTicker.C:
 			_, err := r.getWhiteList()
 			_, err := r.getWhiteList()
 			if err != nil {
 			if err != nil {
-				log.WithError(err).Errorf("connWhiteList error")
+				klog.WithError(err).Errorf("connWhiteList error")
 			}
 			}
 			runtimeApps := make(map[uint32]AppStatusInfo)
 			runtimeApps := make(map[uint32]AppStatusInfo)
 			for pid, c := range r.containersByPid {
 			for pid, c := range r.containersByPid {
@@ -368,7 +367,7 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 			case ebpftracer.EventTypeConnectionOpen:
 			case ebpftracer.EventTypeConnectionOpen:
 				//fmt.Println("ebpftracer.EventTypeConnectionOpen==================", e.Pid)
 				//fmt.Println("ebpftracer.EventTypeConnectionOpen==================", e.Pid)
 				if c := r.getOrCreateContainer(e.Pid); c != nil {
 				if c := r.getOrCreateContainer(e.Pid); c != nil {
-					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.Timestamp, false)
+					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.Timestamp, false, e.Duration)
 					c.eventReady()
 					c.eventReady()
 					if common.IsOpenFilter() && common.IsFilterPid(e.Pid) {
 					if common.IsOpenFilter() && common.IsFilterPid(e.Pid) {
 						c.WhiteSettingInfo.AppName = enums.TestApp
 						c.WhiteSettingInfo.AppName = enums.TestApp
@@ -390,24 +389,12 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 				if c := r.containersByPid[e.Pid]; c != nil {
 				if c := r.containersByPid[e.Pid]; c != nil {
 					c.onListenClose(e.Pid, e.SrcAddr)
 					c.onListenClose(e.Pid, e.SrcAddr)
 				}
 				}
-
-			case ebpftracer.EventTypeConnectionOpen:
-				if c := r.getOrCreateContainer(e.Pid); c != nil {
-					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, e.Timestamp, false, e.Duration)
-					c.attachTlsUprobes(r.tracer, e.Pid)
-				} else {
-					klog.Infoln("TCP connection from unknown container", e)
-				}
 			case ebpftracer.EventTypeConnectionError:
 			case ebpftracer.EventTypeConnectionError:
 				if c := r.getOrCreateContainer(e.Pid); c != nil {
 				if c := r.getOrCreateContainer(e.Pid); c != nil {
 					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, 0, true, e.Duration)
 					c.onConnectionOpen(e.Pid, e.Fd, e.SrcAddr, e.DstAddr, 0, true, e.Duration)
 				} else {
 				} else {
 					klog.Infoln("TCP connection error from unknown container", e)
 					klog.Infoln("TCP connection error from unknown container", e)
 				}
 				}
-			case ebpftracer.EventTypeConnectionClose:
-				if c := r.containersByPid[e.Pid]; c != nil {
-					c.onConnectionClose(e)
-				}
 			case ebpftracer.EventTypeTCPRetransmit:
 			case ebpftracer.EventTypeTCPRetransmit:
 				srcDst := AddrPair{src: e.SrcAddr, dst: e.DstAddr}
 				srcDst := AddrPair{src: e.SrcAddr, dst: e.DstAddr}
 				for _, c := range r.containersById {
 				for _, c := range r.containersById {
@@ -448,105 +435,100 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 					// fmt.Printf("e.EventTypeFunEnt ErrorError: TraceId:%d, Pid:%d, Location:%d, Goid:%d, TimeNs:%d, Ip:%X, CallerIp:%x, Bp:%x, CallerBp:%x", e.StackEvent.TraceId, e.StackEvent.Pid, e.StackEvent.Location, e.StackEvent.Goid, e.StackEvent.TimeNsStart, e.StackEvent.Ip, e.StackEvent.CallerIp, e.StackEvent.Bp, e.StackEvent.CallerBp)
 					// fmt.Printf("e.EventTypeFunEnt ErrorError: TraceId:%d, Pid:%d, Location:%d, Goid:%d, TimeNs:%d, Ip:%X, CallerIp:%x, Bp:%x, CallerBp:%x", e.StackEvent.TraceId, e.StackEvent.Pid, e.StackEvent.Location, e.StackEvent.Goid, e.StackEvent.TimeNsStart, e.StackEvent.Ip, e.StackEvent.CallerIp, e.StackEvent.Bp, e.StackEvent.CallerBp)
 					// fmt.Printf("e.EventTypeFunEnt ErrorError: TraceId:%x, FPid:%x, Nid:%x, Level:%d\n", e.StackEvent.Fpid, e.StackEvent.Nid, e.StackEvent.Level)
 					// fmt.Printf("e.EventTypeFunEnt ErrorError: TraceId:%x, FPid:%x, Nid:%x, Level:%d\n", e.StackEvent.Fpid, e.StackEvent.Nid, e.StackEvent.Level)
 				}
 				}
-			case ebpftracer.EventTypePythonThreadLock:
-				if c := r.containersByPid[e.Pid]; c != nil {
-					c.pythonThreadLockWaitTime += e.Duration
-				}
+			}
 		}
 		}
 	}
 	}
 }
 }
 
 
-// func (r *Registry) getOrCreateContainer(pid uint32) *Container {
-// 	if c, seen := r.containersByPid[pid]; c != nil {
-// 		return c
-// 	} else if seen { // ignored
-// 		return nil
-// 	}
-// 	cg, err := proc.ReadCgroup(pid)
-// 	if err != nil {
-// 		if !common.IsNotExist(err) {
-// 			klog.Warningln("failed to read proc cgroup:", err)
-// 		}
-// 		return nil
-// 	}
-// 	cgId := fmt.Sprintf("%s/%d", cg.Id, pid)
-// 	if c := r.containersByCgroupId[cgId]; c != nil {
-// 		r.containersByPid[pid] = c
-// 		return c
-// 	}
-// 	if cg.ContainerType == cgroup.ContainerTypeSandbox {
-// 		cmdline := proc.GetCmdline(pid)
-// 		parts := bytes.Split(cmdline, []byte{0})
-// 		if len(parts) > 0 {
-// 			cmd := parts[0]
-// 			lastArg := parts[len(parts)-1]
-// 			if (bytes.HasSuffix(cmd, []byte("runsc-sandbox")) || bytes.HasSuffix(cmd, []byte("runsc"))) && containerIdRegexp.Match(lastArg) {
-// 				cg.ContainerId = string(lastArg)
-// 			}
-// 		}
-// 	}
-// 	md, err := getContainerMetadata(cg)
-// 	if err != nil {
-// 		klog.Warningf("failed to get container metadata for pid %d -> %s: %s", pid, cg.Id, err)
-// 		return nil
-// 	}
-// 	// add ns/workload/podname
-// 	id, extensionTag := calcId(cg, md, pid)
-
-// 	//klog.Infof("calculated container id %d -> %s -> %s", pid, cg.Id, id)
-// 	if id == "" {
-// 		if cg.Id == "/init.scope" && pid != 1 {
-// 			klog.Infoln("ignoring without persisting", "cg", cg.Id, "pid", pid)
-// 		} else {
-// 			klog.Infoln("ignoring", "cg", cg.Id, "pid", pid)
-// 			r.containersByPid[pid] = nil
-// 		}
-// 		return nil
-// 	}
-// 	if c := r.containersById[id]; c != nil {
-// 		//klog.Warningln("id conflict:", id)
-// 		if cg.CreatedAt().After(c.cgroup.CreatedAt()) {
-// 			c.cgroup = cg
-// 			c.metadata = md
-// 			c.runLogParser("")
-// 			if c.nsConntrack != nil {
-// 				_ = c.nsConntrack.Close()
-// 				c.nsConntrack = nil
-// 			}
-// 		}
-// 		setK8sTag(c, extensionTag, pid)
-// 		r.containersByPid[pid] = c
-// 		r.containersByCgroupId[cgId] = c
-// 		return c
-// 	}
-// 	c, err := NewContainer(id, cg, md, r.hostConntrack, pid, r)
-// 	if err != nil {
-// 		klog.Warningf("failed to create container pid=%d cg=%s id=%s: %s", pid, cg.Id, id, err)
-// 		return nil
-// 	}
-
-// 	//klog.Infoln("detected a new container", "pid", pid, "cg", cg.Id, "id", id)
-// 	// add ns/workload/podname/pid/ctype
-// 	//sType := fmt.Sprintf("%d", cg.ContainerType)
-
-// 	setK8sTag(c, extensionTag, pid)
-// 	if err := prometheus.WrapRegistererWith(setLabels(string(id),
-// 		extensionTag[Namespace],
-// 		extensionTag[PodName],
-// 		extensionTag[ProcessName],
-// 		fmt.Sprintf("%d", pid)), r.reg).Register(c); err != nil {
-// 		klog.Warningln("failed to register container:", err)
-// 		return nil
-// 	}
-// 	r.containersByPid[pid] = c
-// 	r.containersByCgroupId[cgId] = c
-// 	r.containersById[id] = c
-// 	return c
-// }
 func (r *Registry) getOrCreateContainer(pid uint32) *Container {
 func (r *Registry) getOrCreateContainer(pid uint32) *Container {
-	return nil
+	if c, seen := r.containersByPid[pid]; c != nil {
+		return c
+	} else if seen { // ignored
+		return nil
+	}
+	cg, err := proc.ReadCgroup(pid)
+	if err != nil {
+		if !common.IsNotExist(err) {
+			klog.Warningln("failed to read proc cgroup:", err)
+		}
+		return nil
+	}
+	cgId := fmt.Sprintf("%s/%d", cg.Id, pid)
+	if c := r.containersByCgroupId[cgId]; c != nil {
+		r.containersByPid[pid] = c
+		return c
+	}
+	if cg.ContainerType == cgroup.ContainerTypeSandbox {
+		cmdline := proc.GetCmdline(pid)
+		parts := bytes.Split(cmdline, []byte{0})
+		if len(parts) > 0 {
+			cmd := parts[0]
+			lastArg := parts[len(parts)-1]
+			if (bytes.HasSuffix(cmd, []byte("runsc-sandbox")) || bytes.HasSuffix(cmd, []byte("runsc"))) && containerIdRegexp.Match(lastArg) {
+				cg.ContainerId = string(lastArg)
+			}
+		}
+	}
+	md, err := getContainerMetadata(cg)
+	if err != nil {
+		klog.Warningf("failed to get container metadata for pid %d -> %s: %s", pid, cg.Id, err)
+		return nil
+	}
+	// add ns/workload/podname
+	id, extensionTag := calcId(cg, md, pid)
+
+	//klog.Infof("calculated container id %d -> %s -> %s", pid, cg.Id, id)
+	if id == "" {
+		if cg.Id == "/init.scope" && pid != 1 {
+			klog.Infoln("ignoring without persisting", "cg", cg.Id, "pid", pid)
+		} else {
+			klog.Infoln("ignoring", "cg", cg.Id, "pid", pid)
+			r.containersByPid[pid] = nil
+		}
+		return nil
+	}
+	if c := r.containersById[id]; c != nil {
+		//klog.Warningln("id conflict:", id)
+		if cg.CreatedAt().After(c.cgroup.CreatedAt()) {
+			c.cgroup = cg
+			c.metadata = md
+			c.runLogParser("")
+			if c.nsConntrack != nil {
+				_ = c.nsConntrack.Close()
+				c.nsConntrack = nil
+			}
+		}
+		setK8sTag(c, extensionTag, pid)
+		r.containersByPid[pid] = c
+		r.containersByCgroupId[cgId] = c
+		return c
+	}
+	c, err := NewContainer(id, cg, md, r.hostConntrack, pid, r)
+	if err != nil {
+		klog.Warningf("failed to create container pid=%d cg=%s id=%s: %s", pid, cg.Id, id, err)
+		return nil
+	}
+
+	//klog.Infoln("detected a new container", "pid", pid, "cg", cg.Id, "id", id)
+	// add ns/workload/podname/pid/ctype
+	//sType := fmt.Sprintf("%d", cg.ContainerType)
+
+	setK8sTag(c, extensionTag, pid)
+	if err := prometheus.WrapRegistererWith(setLabels(string(id),
+		extensionTag[Namespace],
+		extensionTag[PodName],
+		extensionTag[ProcessName],
+		fmt.Sprintf("%d", pid)), r.reg).Register(c); err != nil {
+		klog.Warningln("failed to register container:", err)
+		return nil
+	}
+	r.containersByPid[pid] = c
+	r.containersByCgroupId[cgId] = c
+	r.containersById[id] = c
+	return c
 }
 }
 
 
+
 func (r *Registry) updateTrafficStatsIfNecessary() {
 func (r *Registry) updateTrafficStatsIfNecessary() {
 	r.trafficStatsLock.Lock()
 	r.trafficStatsLock.Lock()
 	defer r.trafficStatsLock.Unlock()
 	defer r.trafficStatsLock.Unlock()
@@ -635,7 +617,7 @@ func calcId(cg *cgroup.Cgroup, md *ContainerMetadata, pid uint32) (ContainerID,
 		namespace := md.env["NOMAD_NAMESPACE"]
 		namespace := md.env["NOMAD_NAMESPACE"]
 		task := md.env["NOMAD_TASK_NAME"]
 		task := md.env["NOMAD_TASK_NAME"]
 		if allocId != "" && group != "" && job != "" && namespace != "" && task != "" {
 		if allocId != "" && group != "" && job != "" && namespace != "" && task != "" {
-			return ContainerID(fmt.Sprintf("/nomad/%s/%s/%s/%s/%s", namespace, job, group, allocId, task))
+			return ContainerID(fmt.Sprintf("/nomad/%s/%s/%s/%s/%s", namespace, job, group, allocId, task)), extensionTag
 		}
 		}
 	}
 	}
 	if md.name == "" { // should be "pure" dockerd container here
 	if md.name == "" { // should be "pure" dockerd container here

+ 4 - 16
main.go

@@ -8,6 +8,7 @@ import (
 	"net/http"
 	"net/http"
 	_ "net/http/pprof"
 	_ "net/http/pprof"
 	"os"
 	"os"
+	"path"
 	"runtime"
 	"runtime"
 	"strings"
 	"strings"
 
 
@@ -19,8 +20,6 @@ import (
 	"github.com/coroot/coroot-node-agent/flags"
 	"github.com/coroot/coroot-node-agent/flags"
 	"github.com/coroot/coroot-node-agent/logs"
 	"github.com/coroot/coroot-node-agent/logs"
 	"github.com/coroot/coroot-node-agent/node"
 	"github.com/coroot/coroot-node-agent/node"
-	"github.com/coroot/coroot-node-agent/proc"
-	"github.com/coroot/coroot-node-agent/profiling"
 	"github.com/coroot/coroot-node-agent/prom"
 	"github.com/coroot/coroot-node-agent/prom"
 	"github.com/coroot/coroot-node-agent/tracing"
 	"github.com/coroot/coroot-node-agent/tracing"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/client_golang/prometheus"
@@ -74,8 +73,8 @@ func uname() (string, string, error) {
 }
 }
 
 
 func machineID() string {
 func machineID() string {
-	for _, p := range []string{"/etc/machine-id", "/var/lib/dbus/machine-id", "/sys/devices/virtual/dmi/id/product_uuid"} {
-		payload, err := os.ReadFile(proc.HostPath(p))
+	for _, p := range []string{"sys/devices/virtual/dmi/id/product_uuid", "etc/machine-id", "var/lib/dbus/machine-id"} {
+		payload, err := os.ReadFile(path.Join("/proc/1/root", p))
 		if err != nil {
 		if err != nil {
 			log.Warningln("failed to read machine-id:", err)
 			log.Warningln("failed to read machine-id:", err)
 			continue
 			continue
@@ -87,15 +86,6 @@ func machineID() string {
 	return ""
 	return ""
 }
 }
 
 
-func systemUUID() string {
-	payload, err := os.ReadFile(proc.HostPath("/sys/devices/virtual/dmi/id/product_uuid"))
-	if err != nil {
-		klog.Warningln("failed to read system-uuid:", err)
-		return ""
-	}
-	return strings.TrimSpace(string(payload))
-}
-
 func whitelistNodeExternalNetworks() {
 func whitelistNodeExternalNetworks() {
 	netdevs, err := node.NetDevices()
 	netdevs, err := node.NetDevices()
 	if err != nil {
 	if err != nil {
@@ -160,13 +150,11 @@ func main() {
 	whitelistNodeExternalNetworks()
 	whitelistNodeExternalNetworks()
 
 
 	machineId := machineID()
 	machineId := machineID()
-	systemUuid := systemUUID()
-
 	tracing.Init(machineId, hostname, version)
 	tracing.Init(machineId, hostname, version)
 	logs.Init(machineId, hostname, version)
 	logs.Init(machineId, hostname, version)
 
 
 	registry := prometheus.NewRegistry()
 	registry := prometheus.NewRegistry()
-	registerer := prometheus.WrapRegistererWith(prometheus.Labels{"machine_id": machineId, "system_uuid": systemUuid}, registry)
+	registerer := prometheus.WrapRegistererWith(prometheus.Labels{"machine_id": machineId}, registry)
 
 
 	registerer.MustRegister(info("node_agent_info", version))
 	registerer.MustRegister(info("node_agent_info", version))