metrics.go 5.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. package containers
  2. import (
  3. "github.com/coroot/coroot-node-agent/ebpftracer"
  4. "github.com/prometheus/client_golang/prometheus"
  5. "reflect"
  6. )
  7. var metrics = struct {
  8. Restarts *prometheus.Desc
  9. CPULimit *prometheus.Desc
  10. CPUUsage *prometheus.Desc
  11. CPUDelay *prometheus.Desc
  12. ThrottledTime *prometheus.Desc
  13. MemoryLimit *prometheus.Desc
  14. MemoryRss *prometheus.Desc
  15. MemoryCache *prometheus.Desc
  16. OOMKills *prometheus.Desc
  17. DiskDelay *prometheus.Desc
  18. DiskSize *prometheus.Desc
  19. DiskUsed *prometheus.Desc
  20. DiskReserved *prometheus.Desc
  21. DiskReadOps *prometheus.Desc
  22. DiskReadBytes *prometheus.Desc
  23. DiskWriteOps *prometheus.Desc
  24. DiskWriteBytes *prometheus.Desc
  25. NetListenInfo *prometheus.Desc
  26. NetConnectsSuccessful *prometheus.Desc
  27. NetConnectsFailed *prometheus.Desc
  28. NetConnectionsActive *prometheus.Desc
  29. NetRetransmits *prometheus.Desc
  30. NetLatency *prometheus.Desc
  31. LogMessages *prometheus.Desc
  32. ApplicationType *prometheus.Desc
  33. }{
  34. Restarts: metric("container_restarts_total", "Number of times the container was restarted"),
  35. CPULimit: metric("container_resources_cpu_limit_cores", "CPU limit of the container"),
  36. CPUUsage: metric("container_resources_cpu_usage_seconds_total", "Total CPU time consumed by the container"),
  37. CPUDelay: metric("container_resources_cpu_delay_seconds_total", "Total time duration processes of the container have been waiting for a CPU (while being runnable)"),
  38. ThrottledTime: metric("container_resources_cpu_throttled_seconds_total", "Total time duration the container has been throttled"),
  39. MemoryLimit: metric("container_resources_memory_limit_bytes", "Memory limit of the container"),
  40. MemoryRss: metric("container_resources_memory_rss_bytes", "Amount of physical memory used by the container (doesn't include page cache)"),
  41. MemoryCache: metric("container_resources_memory_cache_bytes", "Amount of page cache memory allocated by the container"),
  42. OOMKills: metric("container_oom_kills_total", "Total number of times the container was terminated by the OOM killer"),
  43. DiskDelay: metric("container_resources_disk_delay_seconds_total", "Total time duration processes of the container have been waiting fot I/Os to complete"),
  44. DiskSize: metric("container_resources_disk_size_bytes", "Total capacity of the volume", "mount_point", "device", "volume"),
  45. DiskUsed: metric("container_resources_disk_used_bytes", "Used capacity of the volume", "mount_point", "device", "volume"),
  46. DiskReserved: metric("container_resources_disk_reserved_bytes", "Reserved capacity of the volume", "mount_point", "device", "volume"),
  47. DiskReadOps: metric("container_resources_disk_reads_total", "Total number of reads completed successfully by the container", "mount_point", "device", "volume"),
  48. DiskReadBytes: metric("container_resources_disk_read_bytes_total", "Total number of bytes read from the disk by the container", "mount_point", "device", "volume"),
  49. DiskWriteOps: metric("container_resources_disk_writes_total", "Total number of writes completed successfully by the container", "mount_point", "device", "volume"),
  50. DiskWriteBytes: metric("container_resources_disk_written_bytes_total", "Total number of bytes written to the disk by the container", "mount_point", "device", "volume"),
  51. NetListenInfo: metric("container_net_tcp_listen_info", "Listen address of the container", "listen_addr", "proxy"),
  52. NetConnectsSuccessful: metric("container_net_tcp_successful_connects_total", "Total number of successful TCP connects", "destination", "actual_destination"),
  53. NetConnectsFailed: metric("container_net_tcp_failed_connects_total", "Total number of failed TCP connects", "destination"),
  54. NetConnectionsActive: metric("container_net_tcp_active_connections", "Number of active outbound connections used by the container", "destination", "actual_destination"),
  55. NetRetransmits: metric("container_net_tcp_retransmits_total", "Total number of retransmitted TCP segments", "destination", "actual_destination"),
  56. NetLatency: metric("container_net_latency_seconds", "Round-trip time between the container and a remote IP", "destination_ip"),
  57. LogMessages: metric("container_log_messages_total", "Number of messages grouped by the automatically extracted repeated pattern", "source", "level", "pattern_hash", "sample"),
  58. ApplicationType: metric("container_application_type", "Type of the application running in the container (e.g. memcached, postgres, mysql)", "application_type"),
  59. }
  60. var (
  61. L7Requests = map[ebpftracer.L7Protocol]prometheus.CounterOpts{
  62. ebpftracer.L7ProtocolHTTP: {Name: "container_http_requests_total", Help: "Total number of outbound HTTP requests"},
  63. ebpftracer.L7ProtocolPostgres: {Name: "container_postgres_queries_total", Help: "Total number of outbound Postgres queries"},
  64. }
  65. L7Latency = map[ebpftracer.L7Protocol]prometheus.HistogramOpts{
  66. ebpftracer.L7ProtocolHTTP: {Name: "container_http_request_duration_seconds_total", Help: "Histogram of the response time for each outbound HTTP request"},
  67. ebpftracer.L7ProtocolPostgres: {Name: "container_postgres_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Postgres query"},
  68. }
  69. )
  70. func metric(name, help string, labels ...string) *prometheus.Desc {
  71. return prometheus.NewDesc(name, help, labels, nil)
  72. }
  73. var metricsList []*prometheus.Desc
  74. func init() {
  75. v := reflect.ValueOf(metrics)
  76. for i := 0; i < v.NumField(); i++ {
  77. metricsList = append(metricsList, v.Field(i).Interface().(*prometheus.Desc))
  78. }
  79. }