metrics.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. package containers
  2. import (
  3. "github.com/coroot/coroot-node-agent/ebpftracer/l7"
  4. "github.com/prometheus/client_golang/prometheus"
  5. )
  6. var metrics = struct {
  7. ContainerInfo *prometheus.Desc
  8. Restarts *prometheus.Desc
  9. CPULimit *prometheus.Desc
  10. CPUUsage *prometheus.Desc
  11. CPUDelay *prometheus.Desc
  12. ThrottledTime *prometheus.Desc
  13. MemoryLimit *prometheus.Desc
  14. MemoryRss *prometheus.Desc
  15. MemoryCache *prometheus.Desc
  16. OOMKills *prometheus.Desc
  17. DiskDelay *prometheus.Desc
  18. DiskSize *prometheus.Desc
  19. DiskUsed *prometheus.Desc
  20. DiskReserved *prometheus.Desc
  21. DiskReadOps *prometheus.Desc
  22. DiskReadBytes *prometheus.Desc
  23. DiskWriteOps *prometheus.Desc
  24. DiskWriteBytes *prometheus.Desc
  25. NetListenInfo *prometheus.Desc
  26. NetConnectionsSuccessful *prometheus.Desc
  27. NetConnectionsTotalTime *prometheus.Desc
  28. NetConnectionsFailed *prometheus.Desc
  29. NetConnectionsActive *prometheus.Desc
  30. NetRetransmits *prometheus.Desc
  31. NetLatency *prometheus.Desc
  32. NetBytesSent *prometheus.Desc
  33. NetBytesReceived *prometheus.Desc
  34. NetBytesSentPer *prometheus.Desc
  35. NetBytesReceivedPer *prometheus.Desc
  36. NetDataLatency *prometheus.Desc
  37. NetDataDuration *prometheus.Desc
  38. NetEstTime *prometheus.Desc
  39. NetAcceptsSuccessful *prometheus.Desc
  40. NetAcceptsActive *prometheus.Desc
  41. NetAcceptBytesSent *prometheus.Desc
  42. NetAcceptBytesReceived *prometheus.Desc
  43. LogMessages *prometheus.Desc
  44. ApplicationType *prometheus.Desc
  45. JvmInfo *prometheus.Desc
  46. JvmHeapSize *prometheus.Desc
  47. JvmHeapUsed *prometheus.Desc
  48. JvmGCTime *prometheus.Desc
  49. JvmSafepointTime *prometheus.Desc
  50. JvmSafepointSyncTime *prometheus.Desc
  51. PythonThreadLockWaitTime *prometheus.Desc
  52. Ip2Fqdn *prometheus.Desc
  53. }{
  54. ContainerInfo: metric("process_info", "Meta information about the process", "image", "systemd_triggered_by"),
  55. Restarts: metric("process_restarts_total", "Number of times the process was restarted"),
  56. CPULimit: metric("process_resources_cpu_limit_cores", "CPU limit of the process"),
  57. CPUUsage: metric("process_resources_cpu_usage_seconds_total", "Total CPU time consumed by the process"),
  58. CPUDelay: metric("process_resources_cpu_delay_seconds_total", "Total time duration processes of the process have been waiting for a CPU (while being runnable)"),
  59. ThrottledTime: metric("process_resources_cpu_throttled_seconds_total", "Total time duration the process has been throttled"),
  60. MemoryLimit: metric("process_resources_memory_limit_bytes", "Memory limit of the process"),
  61. MemoryRss: metric("process_resources_memory_rss_bytes", "Amount of physical memory used by the process (doesn't include page cache)"),
  62. MemoryCache: metric("process_resources_memory_cache_bytes", "Amount of page cache memory allocated by the process"),
  63. OOMKills: metric("process_oom_kills_total", "Total number of times the process was terminated by the OOM killer"),
  64. DiskDelay: metric("process_resources_disk_delay_seconds_total", "Total time duration processes of the process have been waiting fot I/Os to complete"),
  65. DiskSize: metric("process_resources_disk_size_bytes", "Total capacity of the volume", "mount_point", "device", "volume"),
  66. DiskUsed: metric("process_resources_disk_used_bytes", "Used capacity of the volume", "mount_point", "device", "volume"),
  67. DiskReserved: metric("process_resources_disk_reserved_bytes", "Reserved capacity of the volume", "mount_point", "device", "volume"),
  68. DiskReadOps: metric("process_resources_disk_reads_total", "Total number of reads completed successfully by the process", "mount_point", "device", "volume"),
  69. DiskReadBytes: metric("process_resources_disk_read_bytes_total", "Total number of bytes read from the disk by the process", "mount_point", "device", "volume"),
  70. DiskWriteOps: metric("process_resources_disk_writes_total", "Total number of writes completed successfully by the process", "mount_point", "device", "volume"),
  71. DiskWriteBytes: metric("process_resources_disk_written_bytes_total", "Total number of bytes written to the disk by the process", "mount_point", "device", "volume"),
  72. NetListenInfo: metric("process_net_tcp_listen_info", "Listen address of the process", "listen_addr", "proxy"),
  73. NetConnectionsSuccessful: metric("process_net_tcp_successful_connects_total", "Total number of successful TCP connects", "destination", "actual_destination"),
  74. NetConnectionsTotalTime: metric("process_net_tcp_connection_time_seconds_total", "Time spent on TCP connections", "destination", "actual_destination"),
  75. NetConnectionsFailed: metric("process_net_tcp_failed_connects_total", "Total number of failed TCP connects", "destination"),
  76. NetConnectionsActive: metric("process_net_tcp_active_connections", "Number of active outbound connections used by the process", "destination", "actual_destination"),
  77. NetRetransmits: metric("process_net_tcp_retransmits_total", "Total number of retransmitted TCP segments", "destination", "actual_destination"),
  78. NetLatency: metric("process_net_latency_seconds", "Round-trip time between the process and a remote IP", "destination_ip"),
  79. NetBytesSent: metric("process_net_tcp_bytes_sent_total", "Total number of bytes sent to the peer", "destination", "actual_destination", "src"),
  80. NetBytesReceived: metric("process_net_tcp_bytes_received_total", "Total number of bytes received from the peer", "destination", "actual_destination", "src"),
  81. NetBytesSentPer: metric("process_net_tcp_bytes_sent_per", "Per number of bytes sent to the peer", "destination", "actual_destination", "src"),
  82. NetBytesReceivedPer: metric("process_net_tcp_bytes_received_per", "Per number of bytes received from the peer", "destination", "actual_destination", "src"),
  83. NetAcceptsSuccessful: metric("process_net_tcp_successful_accept_total", "Total number of successful TCP accepts", "destination", "actual_destination"),
  84. NetAcceptBytesSent: metric("process_net_tcp_bytes_sent_accept_total", "Total number of bytes sent to the peer", "destination", "actual_destination"),
  85. NetAcceptBytesReceived: metric("process_net_tcp_bytes_received_accept_total", "Total number of bytes received from the peer", "destination", "actual_destination"),
  86. NetDataLatency: metric("process_net_tcp_data_latency", "Data latency", "destination", "actual_destination", "src"),
  87. NetDataDuration: metric("process_net_tcp_data_duration", "Data duration", "destination", "actual_destination", "src"),
  88. NetEstTime: metric("process_net_tcp_est_time", "Established time", "destination", "actual_destination", "src"),
  89. LogMessages: metric("process_log_messages_total", "Number of messages grouped by the automatically extracted repeated pattern", "source", "level", "pattern_hash", "sample"),
  90. ApplicationType: metric("process_application_type", "Type of the application running in the process (e.g. memcached, postgres, mysql)", "application_type"),
  91. JvmInfo: metric("process_jvm_info", "Meta information about the JVM", "jvm", "java_version"),
  92. JvmHeapSize: metric("process_jvm_heap_size_bytes", "Total heap size in bytes", "jvm"),
  93. JvmHeapUsed: metric("process_jvm_heap_used_bytes", "Used heap size in bytes", "jvm"),
  94. JvmGCTime: metric("process_jvm_gc_time_seconds", "Time spent in the given JVM garbage collector in seconds", "jvm", "gc"),
  95. JvmSafepointTime: metric("process_jvm_safepoint_time_seconds", "Time the application has been stopped for safepoint operations in seconds", "jvm"),
  96. JvmSafepointSyncTime: metric("process_jvm_safepoint_sync_time_seconds", "Time spent getting to safepoints in seconds", "jvm"),
  97. Ip2Fqdn: metric("ip_to_fqdn", "Mapping IP addresses to FQDNs based on DNS requests initiated by processs", "ip", "fqdn"),
  98. PythonThreadLockWaitTime: metric("process_python_thread_lock_wait_time_seconds", "Time spent waiting acquiring GIL in seconds"),
  99. }
  100. var (
  101. L7Requests = map[l7.Protocol]prometheus.CounterOpts{
  102. l7.ProtocolHTTP: {Name: "process_http_requests_total", Help: "Total number of outbound HTTP requests"},
  103. l7.ProtocolPostgres: {Name: "process_postgres_queries_total", Help: "Total number of outbound Postgres queries"},
  104. l7.ProtocolRedis: {Name: "process_redis_queries_total", Help: "Total number of outbound Redis queries"},
  105. l7.ProtocolMemcached: {Name: "process_memcached_queries_total", Help: "Total number of outbound Memcached queries"},
  106. l7.ProtocolMysql: {Name: "process_mysql_queries_total", Help: "Total number of outbound Mysql queries"},
  107. l7.ProtocolMongo: {Name: "process_mongo_queries_total", Help: "Total number of outbound Mongo queries"},
  108. l7.ProtocolKafka: {Name: "process_kafka_requests_total", Help: "Total number of outbound Kafka requests"},
  109. l7.ProtocolCassandra: {Name: "process_cassandra_queries_total", Help: "Total number of outbound Cassandra requests"},
  110. l7.ProtocolRabbitmq: {Name: "process_rabbitmq_messages_total", Help: "Total number of Rabbitmq messages produced or consumed by the process"},
  111. l7.ProtocolNats: {Name: "process_nats_messages_total", Help: "Total number of NATS messages produced or consumed by the process"},
  112. l7.ProtocolDubbo2: {Name: "process_dubbo_requests_total", Help: "Total number of outbound DUBBO requests"},
  113. l7.ProtocolDNS: {Name: "process_dns_requests_total", Help: "Total number of outbound DNS requests"},
  114. }
  115. L7Latency = map[l7.Protocol]prometheus.HistogramOpts{
  116. l7.ProtocolHTTP: {Name: "process_http_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound HTTP request"},
  117. l7.ProtocolPostgres: {Name: "process_postgres_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Postgres query"},
  118. l7.ProtocolRedis: {Name: "process_redis_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Redis query"},
  119. l7.ProtocolMemcached: {Name: "process_memcached_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Memcached query"},
  120. l7.ProtocolMysql: {Name: "process_mysql_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Mysql query"},
  121. l7.ProtocolMongo: {Name: "process_mongo_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Mongo query"},
  122. l7.ProtocolKafka: {Name: "process_kafka_requests_duration_seconds_total", Help: "Histogram of the execution time for each outbound Kafka request"},
  123. l7.ProtocolCassandra: {Name: "process_cassandra_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Cassandra request"},
  124. l7.ProtocolDubbo2: {Name: "process_dubbo_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound DUBBO request"},
  125. l7.ProtocolDNS: {Name: "process_dns_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound DNS request"},
  126. }
  127. )
  128. func metric(name, help string, labels ...string) *prometheus.Desc {
  129. return prometheus.NewDesc(name, help, labels, nil)
  130. }
  131. func newCounter(name, help string, constLabels prometheus.Labels) prometheus.Counter {
  132. return prometheus.NewCounter(prometheus.CounterOpts{Name: name, Help: help, ConstLabels: constLabels})
  133. }
  134. func newCounterVec(name, help string, constLabels prometheus.Labels, labelNames ...string) *prometheus.CounterVec {
  135. return prometheus.NewCounterVec(prometheus.CounterOpts{Name: name, Help: help, ConstLabels: constLabels}, labelNames)
  136. }
  137. func newGauge(name, help string, constLabels prometheus.Labels) prometheus.Gauge {
  138. return prometheus.NewGauge(prometheus.GaugeOpts{Name: name, Help: help, ConstLabels: constLabels})
  139. }
  140. func newGaugeVec(name, help string, constLabels prometheus.Labels, labelNames ...string) *prometheus.GaugeVec {
  141. return prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: name, Help: help, ConstLabels: constLabels}, labelNames)
  142. }