apm_fusing.go 3.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. package containers
  2. import (
  3. "fmt"
  4. "github.com/coroot/coroot-node-agent/flags"
  5. log "github.com/sirupsen/logrus"
  6. "os"
  7. "time"
  8. )
  9. func (r *Registry) DoFusing() error {
  10. log.Infof("-----DoFusing will to execute fuse operates -----\n")
  11. var (
  12. lastErr error
  13. err error
  14. )
  15. if r.isFusing {
  16. log.Infof("[fusing] Euspace is already in a state of fuse.")
  17. return nil
  18. }
  19. //先处于熔断状态 (应用层的uprobes\stackUprobes将进行Detach操作)
  20. r.isFusing = true
  21. for tryLeft := *flags.FuseTryMax; tryLeft > 0; tryLeft-- {
  22. lastErr = nil
  23. //再停止数据发送
  24. if err = os.Setenv("SEND", ""); err != nil {
  25. lastErr = fmt.Errorf("failed to set SEND env: %w", err)
  26. log.Warnf("DoFusing,FuseTryMax[%d],tryLeft[%d] set SEND env occurs error:%s", *flags.FuseTryMax, tryLeft-1, err.Error())
  27. } else {
  28. log.Infof("DoFusing,FuseTryMax[%d],tryLeft[%d] set SEND env OK", *flags.FuseTryMax, tryLeft-1)
  29. }
  30. //最后关闭内核层的tracepoint、kprobe
  31. if err = r.tracer.UnlinkEbpfProg(); err != nil {
  32. lastErr = fmt.Errorf("failed to unlink eBPF program: %w", err)
  33. log.Warnf("DoFusing,FuseTryMax[%d],tryLeft[%d] UnlinkEbpfProg occurs error:%s", *flags.FuseTryMax, tryLeft-1, err.Error())
  34. } else {
  35. log.Infof("DoFusing,FuseTryMax[%d],tryLeft[%d] UnlinkEbpfProg OK", *flags.FuseTryMax, tryLeft-1)
  36. }
  37. if lastErr == nil {
  38. break
  39. }
  40. time.Sleep(time.Duration(100) * time.Millisecond)
  41. }
  42. if lastErr != nil {
  43. //重试N次后,仍然失败 设置熔断机制异常标识 (后续就不进行恢复操作了)
  44. r.IsFuseException = true
  45. log.Warnf("DoFusing,lastErr is not nil, set IsFuseException = true. error: %s", lastErr.Error())
  46. }
  47. return lastErr
  48. }
  49. func (r *Registry) DoResume() error {
  50. log.Infof("-----DoResume will to execute resume operates -----\n")
  51. var (
  52. lastErr error
  53. err error
  54. )
  55. if !r.isFusing {
  56. log.Infof("[resume] Euspace is already in a state of resume.")
  57. return nil
  58. }
  59. for tryLeft := *flags.FuseTryMax; tryLeft > 0; tryLeft-- {
  60. //先开启内核层的tracepoint、kprobe
  61. if err = r.tracer.LinkEbpfProg(); err != nil {
  62. lastErr = fmt.Errorf("failed to link eBPF program: %w", err)
  63. log.Warnf("DoResume,FuseTryMax[%d],tryLeft[%d] LinkEbpfProg occurs error:%s", *flags.FuseTryMax, tryLeft-1, err.Error())
  64. } else {
  65. log.Infof("DoResume,FuseTryMax[%d],tryLeft[%d] LinkEbpfProg OK", *flags.FuseTryMax, tryLeft-1)
  66. }
  67. //再开启数据发送
  68. if err = os.Setenv("SEND", "1"); err != nil {
  69. lastErr = fmt.Errorf("failed to set SEND env: %w", err)
  70. log.Warnf("DoResume,FuseTryMax[%d],tryLeft[%d] set SEND env occurs error:%s", *flags.FuseTryMax, tryLeft-1, err.Error())
  71. } else {
  72. log.Infof("DoResume,FuseTryMax[%d],tryLeft[%d] set SEND env OK", *flags.FuseTryMax, tryLeft-1)
  73. }
  74. if lastErr == nil {
  75. break
  76. }
  77. time.Sleep(time.Duration(100) * time.Millisecond)
  78. }
  79. if lastErr != nil {
  80. //重试N次后,仍然失败 将状态设置为熔断,设置熔断机制异常标识 (后续就不进行熔断操作了)
  81. r.IsFuseException = true
  82. r.isFusing = true
  83. log.Warnf("DoResume,lastErr is not nil, set IsFuseException = true. error: %s", lastErr.Error())
  84. } else {
  85. //最后处于非熔断状态 (应用层的uprobes\stackUprobes将进行Attach操作)
  86. r.isFusing = false
  87. }
  88. return lastErr
  89. }