main.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. package main
  2. import (
  3. "bytes"
  4. "net/http"
  5. _ "net/http/pprof"
  6. "os"
  7. "path"
  8. "runtime"
  9. "strings"
  10. "github.com/coroot/coroot-node-agent/common"
  11. "github.com/coroot/coroot-node-agent/containers"
  12. "github.com/coroot/coroot-node-agent/flags"
  13. "github.com/coroot/coroot-node-agent/logs"
  14. "github.com/coroot/coroot-node-agent/node"
  15. "github.com/coroot/coroot-node-agent/profiling"
  16. "github.com/coroot/coroot-node-agent/prom"
  17. "github.com/coroot/coroot-node-agent/tracing"
  18. "github.com/prometheus/client_golang/prometheus"
  19. "github.com/prometheus/client_golang/prometheus/promhttp"
  20. "golang.org/x/mod/semver"
  21. "golang.org/x/sys/unix"
  22. "golang.org/x/time/rate"
  23. "k8s.io/klog/v2"
  24. )
  25. var (
  26. version = "unknown"
  27. )
  28. const minSupportedKernelVersion = "4.16"
  29. func uname() (string, string, error) {
  30. runtime.LockOSThread()
  31. defer runtime.UnlockOSThread()
  32. f, err := os.Open("/proc/1/ns/uts")
  33. if err != nil {
  34. return "", "", err
  35. }
  36. defer f.Close()
  37. self, err := os.Open("/proc/self/ns/uts")
  38. if err != nil {
  39. return "", "", err
  40. }
  41. defer self.Close()
  42. defer func() {
  43. unix.Setns(int(self.Fd()), unix.CLONE_NEWUTS)
  44. }()
  45. err = unix.Setns(int(f.Fd()), unix.CLONE_NEWUTS)
  46. if err != nil {
  47. return "", "", err
  48. }
  49. var utsname unix.Utsname
  50. if err := unix.Uname(&utsname); err != nil {
  51. return "", "", err
  52. }
  53. hostname := string(bytes.Split(utsname.Nodename[:], []byte{0})[0])
  54. kernelVersion := string(bytes.Split(utsname.Release[:], []byte{0})[0])
  55. return hostname, kernelVersion, nil
  56. }
  57. func machineID() string {
  58. for _, p := range []string{"sys/devices/virtual/dmi/id/product_uuid", "etc/machine-id", "var/lib/dbus/machine-id"} {
  59. payload, err := os.ReadFile(path.Join("/proc/1/root", p))
  60. if err != nil {
  61. klog.Warningln("failed to read machine-id:", err)
  62. continue
  63. }
  64. id := strings.TrimSpace(strings.Replace(string(payload), "-", "", -1))
  65. klog.Infoln("machine-id: ", id)
  66. return id
  67. }
  68. return ""
  69. }
  70. func whitelistNodeExternalNetworks() {
  71. netdevs, err := node.NetDevices()
  72. if err != nil {
  73. klog.Warningln("failed to get network interfaces:", err)
  74. return
  75. }
  76. for _, iface := range netdevs {
  77. for _, p := range iface.IPPrefixes {
  78. if p.IP().IsLoopback() || common.IsIpPrivate(p.IP()) {
  79. continue
  80. }
  81. // if the node has an external network IP, whitelist that network
  82. common.ConnectionFilter.WhitelistPrefix(p)
  83. }
  84. }
  85. }
  86. func main() {
  87. klog.LogToStderr(false)
  88. klog.SetOutput(&RateLimitedLogOutput{limiter: rate.NewLimiter(rate.Limit(*flags.LogPerSecond), *flags.LogBurst)})
  89. klog.Infoln("agent version:", version)
  90. hostname, kv, err := uname()
  91. if err != nil {
  92. klog.Exitln("failed to get uname:", err)
  93. }
  94. klog.Infoln("hostname:", hostname)
  95. klog.Infoln("kernel version:", kv)
  96. ver := common.KernelMajorMinor(kv)
  97. if ver == "" {
  98. klog.Exitln("invalid kernel version:", kv)
  99. }
  100. if semver.Compare("v"+ver, "v"+minSupportedKernelVersion) == -1 {
  101. klog.Exitf("the minimum Linux kernel version required is %s or later", minSupportedKernelVersion)
  102. }
  103. whitelistNodeExternalNetworks()
  104. machineId := machineID()
  105. tracing.Init(machineId, hostname, version)
  106. logs.Init(machineId, hostname, version)
  107. registry := prometheus.NewRegistry()
  108. registerer := prometheus.WrapRegistererWith(prometheus.Labels{"machine_id": machineId}, registry)
  109. registerer.MustRegister(info("node_agent_info", version))
  110. if err := registerer.Register(node.NewCollector(hostname, kv)); err != nil {
  111. klog.Exitln(err)
  112. }
  113. processInfoCh := profiling.Init(machineId, hostname)
  114. cr, err := containers.NewRegistry(registerer, kv, processInfoCh)
  115. if err != nil {
  116. klog.Exitln(err)
  117. }
  118. defer cr.Close()
  119. profiling.Start()
  120. defer profiling.Stop()
  121. if err := prom.StartAgent(machineId); err != nil {
  122. klog.Exitln(err)
  123. }
  124. http.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{ErrorLog: logger{}, Registry: registerer}))
  125. klog.Infoln("listening on:", *flags.ListenAddress)
  126. klog.Errorln(http.ListenAndServe(*flags.ListenAddress, nil))
  127. }
  128. func info(name, version string) prometheus.Collector {
  129. g := prometheus.NewGauge(prometheus.GaugeOpts{
  130. Name: name,
  131. ConstLabels: prometheus.Labels{"version": version},
  132. })
  133. g.Set(1)
  134. return g
  135. }
  136. type logger struct{}
  137. func (l logger) Println(v ...interface{}) {
  138. klog.Errorln(v...)
  139. }
  140. type RateLimitedLogOutput struct {
  141. limiter *rate.Limiter
  142. }
  143. func (o *RateLimitedLogOutput) Write(data []byte) (int, error) {
  144. if !o.limiter.Allow() {
  145. return len(data), nil
  146. }
  147. return os.Stderr.Write(data)
  148. }