|
|
@@ -1,8 +1,11 @@
|
|
|
package containers
|
|
|
|
|
|
import (
|
|
|
+ "fmt"
|
|
|
+ "github.com/coroot/coroot-node-agent/flags"
|
|
|
log "github.com/sirupsen/logrus"
|
|
|
"os"
|
|
|
+ "time"
|
|
|
)
|
|
|
|
|
|
func (r *Registry) DoFusing() error {
|
|
|
@@ -11,15 +14,37 @@ func (r *Registry) DoFusing() error {
|
|
|
lastErr error
|
|
|
err error
|
|
|
)
|
|
|
+
|
|
|
//先处于熔断状态 (应用层的uprobes\stackUprobes将进行Detach操作)
|
|
|
r.isFusing = true
|
|
|
- //再停止数据发送
|
|
|
- if err = os.Setenv("SEND", ""); err != nil {
|
|
|
- lastErr = err
|
|
|
+ for tryLeft := *flags.FuseTryMax; tryLeft > 0; tryLeft-- {
|
|
|
+ lastErr = nil
|
|
|
+ //再停止数据发送
|
|
|
+ if err = os.Setenv("SEND", ""); err != nil {
|
|
|
+ lastErr = fmt.Errorf("failed to set SEND env: %w", err)
|
|
|
+ log.Warnf("DoFusing,FuseTryMax[%d],tryLeft[%d] set SEND env occurs error:%s", *flags.FuseTryMax, tryLeft-1, err.Error())
|
|
|
+ } else {
|
|
|
+ log.Infof("DoFusing,FuseTryMax[%d],tryLeft[%d] set SEND env OK", *flags.FuseTryMax, tryLeft-1)
|
|
|
+ }
|
|
|
+ //最后关闭内核层的tracepoint、kprobe
|
|
|
+ if err = r.tracer.UnlinkEbpfProg(); err != nil {
|
|
|
+ lastErr = fmt.Errorf("failed to unlink eBPF program: %w", err)
|
|
|
+ log.Warnf("DoFusing,FuseTryMax[%d],tryLeft[%d] UnlinkEbpfProg occurs error:%s", *flags.FuseTryMax, tryLeft-1, err.Error())
|
|
|
+ } else {
|
|
|
+ log.Infof("DoFusing,FuseTryMax[%d],tryLeft[%d] UnlinkEbpfProg OK", *flags.FuseTryMax, tryLeft-1)
|
|
|
+ }
|
|
|
+
|
|
|
+ if lastErr == nil {
|
|
|
+ break
|
|
|
+ }
|
|
|
+
|
|
|
+ time.Sleep(time.Duration(100) * time.Millisecond)
|
|
|
}
|
|
|
- //最后关闭内核层的tracepoint、kprobe
|
|
|
- if err = r.tracer.UnlinkEbpfProg(); err != nil {
|
|
|
- lastErr = err
|
|
|
+
|
|
|
+ if lastErr != nil {
|
|
|
+ //重试N次后,仍然失败 设置熔断机制异常标识 (后续就不进行恢复操作了)
|
|
|
+ r.IsFuseException = true
|
|
|
+ log.Warnf("DoFusing,lastErr is not nil, set IsFuseException = true. error: %s", lastErr.Error())
|
|
|
}
|
|
|
return lastErr
|
|
|
}
|
|
|
@@ -30,15 +55,38 @@ func (r *Registry) DoResume() error {
|
|
|
lastErr error
|
|
|
err error
|
|
|
)
|
|
|
- //先开启内核层的tracepoint、kprobe
|
|
|
- if err = r.tracer.LinkEbpfProg(); err != nil {
|
|
|
- lastErr = err
|
|
|
+
|
|
|
+ for tryLeft := *flags.FuseTryMax; tryLeft > 0; tryLeft-- {
|
|
|
+ //先开启内核层的tracepoint、kprobe
|
|
|
+ if err = r.tracer.LinkEbpfProg(); err != nil {
|
|
|
+ lastErr = fmt.Errorf("failed to link eBPF program: %w", err)
|
|
|
+ log.Warnf("DoResume,FuseTryMax[%d],tryLeft[%d] LinkEbpfProg occurs error:%s", *flags.FuseTryMax, tryLeft-1, err.Error())
|
|
|
+ } else {
|
|
|
+ log.Infof("DoResume,FuseTryMax[%d],tryLeft[%d] LinkEbpfProg OK", *flags.FuseTryMax, tryLeft-1)
|
|
|
+ }
|
|
|
+ //再开启数据发送
|
|
|
+ if err = os.Setenv("SEND", "1"); err != nil {
|
|
|
+ lastErr = fmt.Errorf("failed to set SEND env: %w", err)
|
|
|
+ log.Warnf("DoResume,FuseTryMax[%d],tryLeft[%d] set SEND env occurs error:%s", *flags.FuseTryMax, tryLeft-1, err.Error())
|
|
|
+ } else {
|
|
|
+ log.Infof("DoResume,FuseTryMax[%d],tryLeft[%d] set SEND env OK", *flags.FuseTryMax, tryLeft-1)
|
|
|
+ }
|
|
|
+
|
|
|
+ if lastErr == nil {
|
|
|
+ break
|
|
|
+ }
|
|
|
+
|
|
|
+ time.Sleep(time.Duration(100) * time.Millisecond)
|
|
|
}
|
|
|
- //再开启数据发送
|
|
|
- if err = os.Setenv("SEND", "1"); err != nil {
|
|
|
- lastErr = err
|
|
|
+
|
|
|
+ if lastErr != nil {
|
|
|
+ //重试N次后,仍然失败 将状态设置为熔断,设置熔断机制异常标识 (后续就不进行熔断操作了)
|
|
|
+ r.IsFuseException = true
|
|
|
+ r.isFusing = true
|
|
|
+ log.Warnf("DoResume,lastErr is not nil, set IsFuseException = true. error: %s", lastErr.Error())
|
|
|
+ } else {
|
|
|
+ //最后处于非熔断状态 (应用层的uprobes\stackUprobes将进行Attach操作)
|
|
|
+ r.isFusing = false
|
|
|
}
|
|
|
- //最后处于非熔断状态 (应用层的uprobes\stackUprobes将进行Attach操作)
|
|
|
- r.isFusing = false
|
|
|
return lastErr
|
|
|
}
|