Kaynağa Gözat

Fixed #JiraBug28729 熔断恢复阶段,部分Uprobe状态控制问题引起的traceid异常

Carl 1 yıl önce
ebeveyn
işleme
cd12a77bf4

+ 8 - 2
containers/apm_fusing.go

@@ -14,7 +14,10 @@ func (r *Registry) DoFusing() error {
 		lastErr error
 		err     error
 	)
-
+	if r.isFusing {
+		log.Infof("[fusing] Euspace is already in a state of fuse.")
+		return nil
+	}
 	//先处于熔断状态 (应用层的uprobes\stackUprobes将进行Detach操作)
 	r.isFusing = true
 	for tryLeft := *flags.FuseTryMax; tryLeft > 0; tryLeft-- {
@@ -55,7 +58,10 @@ func (r *Registry) DoResume() error {
 		lastErr error
 		err     error
 	)
-
+	if !r.isFusing {
+		log.Infof("[resume] Euspace is already in a state of resume.")
+		return nil
+	}
 	for tryLeft := *flags.FuseTryMax; tryLeft > 0; tryLeft-- {
 		//先开启内核层的tracepoint、kprobe
 		if err = r.tracer.LinkEbpfProg(); err != nil {

+ 8 - 0
containers/apm_register_host.go

@@ -40,6 +40,14 @@ func (r *Registry) TaskRegisterHost() {
 	for {
 		select {
 		case <-gcTicker.C:
+			//_, err1 := os.Stat("/tmp/fuse")
+			//if err1 == nil {
+			//	//r.isFusing = true
+			//	r.DoFusing()
+			//} else {
+			//	r.DoResume()
+			//	//r.isFusing = false
+			//}
 			err := r.RegisterHost()
 			if err != nil {
 				klog.WithError(err).Error("[task] Failed to register host")

+ 1 - 1
containers/container_apm.go

@@ -696,6 +696,7 @@ func (c *Container) DetachUprobes(pid uint32, detachType APP_TYPE) error {
 		p.uprobes = []link.Link{}
 		switch detachType {
 		case APP_UNINSTALL:
+		case APP_FUSE:
 			codeType := c.GetCodeTypeFromCache(pid)
 			switch codeType {
 			case CodeTypeJava:
@@ -704,7 +705,6 @@ func (c *Container) DetachUprobes(pid uint32, detachType APP_TYPE) error {
 				p.goTlsUprobesChecked = false
 				p.openSslUprobesChecked = false
 			default:
-
 			}
 		case APP_UPROBE_ERROR:
 			klog.Infof("[DetachUprobes] ERROR_DETACH for pid %d", pid)