Răsfoiți Sursa

Merge branch 'main' into dev-trace2

Carl 2 ani în urmă
părinte
comite
7a9b3dcbf6

+ 3 - 8
common/volumes.go

@@ -5,18 +5,13 @@ import (
 )
 
 var (
-	k8sVolumeDir = regexp.MustCompile(`.+/(volumes/kubernetes.io~([^/]+)|volume-subpaths|k3s/storage)/(pvc-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12})`)
+	k8sVolumeDir = regexp.MustCompile(`.+(pvc-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}).*`)
 )
 
 func ParseKubernetesVolumeSource(source string) string {
 	groups := k8sVolumeDir.FindStringSubmatch(source)
-	if len(groups) != 4 {
+	if len(groups) != 2 {
 		return ""
 	}
-	provisioner, volume := groups[2], groups[3]
-	switch provisioner {
-	case "secret", "configmap", "empty-dir", "projected":
-		return ""
-	}
-	return volume
+	return groups[1]
 }

+ 4 - 0
common/volumes_test.go

@@ -19,6 +19,10 @@ func TestParseKubernetesVolumeSource(t *testing.T) {
 		"pvc-d0c0cc92-ef36-4b4f-90c0-5c5ed489df0b",
 		ParseKubernetesVolumeSource("/var/lib/rancher/k3s/storage/pvc-d0c0cc92-ef36-4b4f-90c0-5c5ed489df0b_default_mongod-data-mongo-psmdb-db-rs0-0"))
 
+	assert.Equal(t,
+		"pvc-4bf620ab-bb10-4cd6-803a-5be8735ccaf6",
+		ParseKubernetesVolumeSource("/var/snap/microk8s/common/default-storage/coroot-coroot-data-pvc-4bf620ab-bb10-4cd6-803a-5be8735ccaf6"))
+
 	assert.Equal(t,
 		"",
 		ParseKubernetesVolumeSource("/var/lib/kubelet/pods/adf669ca-c3f8-49de-9ad4-9dd66721dc0d/volumes/kubernetes.io~projected/kube-api-access-jvvq6"),

+ 52 - 5
containers/container.go

@@ -126,7 +126,8 @@ type Container struct {
 	connectionsByPidFd map[PidFd]*ActiveConnection
 	retransmits        map[AddrPair]int64 // dst:actual_dst -> count
 
-	l7Stats L7Stats
+	l7Stats  L7Stats
+	dnsStats *L7Metrics
 
 	oomKills int
 
@@ -170,6 +171,7 @@ func NewContainer(id ContainerID, cg *cgroup.Cgroup, md *ContainerMetadata, host
 		connectionsByPidFd: map[PidFd]*ActiveConnection{},
 		retransmits:        map[AddrPair]int64{},
 		l7Stats:            L7Stats{},
+		dnsStats:           &L7Metrics{},
 
 		mounts: map[string]proc.MountInfo{},
 
@@ -354,7 +356,12 @@ func (c *Container) Collect(ch chan<- prometheus.Metric) {
 	for appType := range appTypes {
 		ch <- gauge(metrics.ApplicationType, 1, appType)
 	}
-
+	if c.dnsStats.Requests != nil {
+		c.dnsStats.Requests.Collect(ch)
+	}
+	if c.dnsStats.Latency != nil {
+		c.dnsStats.Latency.Collect(ch)
+	}
 	c.l7Stats.collect(ch)
 
 	if !*flags.DisablePinger {
@@ -575,16 +582,55 @@ func (c *Container) onConnectionClose(srcDst AddrPair) bool {
 	return true
 }
 
-func (c *Container) onL7Request(pid uint32, fd uint64, timestamp uint64, r *l7.RequestData) {
+func (c *Container) onDNSRequest(r *l7.RequestData) map[netaddr.IP]string {
+	status := r.Status.DNS()
+	if status == "" {
+		return nil
+	}
+	t, fqdn, ips := l7.ParseDns(r.Payload)
+	if t == "" {
+		return nil
+	}
+	if c.dnsStats.Requests == nil {
+		dnsReq := L7Requests[l7.ProtocolDNS]
+		c.dnsStats.Requests = prometheus.NewCounterVec(
+			prometheus.CounterOpts{Name: dnsReq.Name, Help: dnsReq.Help},
+			[]string{"request_type", "status"},
+		)
+	}
+	if m, _ := c.dnsStats.Requests.GetMetricWithLabelValues(t, status); m != nil {
+		m.Inc()
+	}
+	if r.Duration != 0 {
+		if c.dnsStats.Latency == nil {
+			dnsLatency := L7Latency[l7.ProtocolDNS]
+			c.dnsStats.Latency = prometheus.NewHistogram(prometheus.HistogramOpts{Name: dnsLatency.Name, Help: dnsLatency.Help})
+		}
+		c.dnsStats.Latency.Observe(r.Duration.Seconds())
+	}
+	ip2fqdn := map[netaddr.IP]string{}
+	if fqdn != "" {
+		for _, ip := range ips {
+			ip2fqdn[ip] = fqdn
+		}
+	}
+	return ip2fqdn
+}
+
+func (c *Container) onL7Request(pid uint32, fd uint64, timestamp uint64, r *l7.RequestData) map[netaddr.IP]string {
 	c.lock.Lock()
 	defer c.lock.Unlock()
 
+	if r.Protocol == l7.ProtocolDNS {
+		return c.onDNSRequest(r)
+	}
+
 	conn := c.connectionsByPidFd[PidFd{Pid: pid, Fd: fd}]
 	if conn == nil {
-		return
+		return nil
 	}
 	if timestamp != 0 && conn.Timestamp != timestamp {
-		return
+		return nil
 	}
 	stats := c.l7Stats.get(r.Protocol, conn.Dest, conn.ActualDest)
 	trace := tracing.NewTrace(string(c.id), conn.ActualDest)
@@ -639,6 +685,7 @@ func (c *Container) onL7Request(pid uint32, fd uint64, timestamp uint64, r *l7.R
 	case l7.ProtocolDubbo2:
 		stats.observe(r.Status.String(), "", r.Duration)
 	}
+	return nil
 }
 
 func (c *Container) onRetransmit(srcDst AddrPair) bool {

+ 10 - 5
containers/container_apm.go

@@ -29,9 +29,13 @@ func (c *Container) InitTrace(traceId uint64, r *l7.RequestData) error {
 	return nil
 }
 
-func (c *Container) onL7RequestApm(pid uint32, fd uint64, timestamp uint64, r *l7.RequestData) {
+func (c *Container) onL7RequestApm(pid uint32, fd uint64, timestamp uint64, r *l7.RequestData) map[netaddr.IP]string {
 	c.lock.Lock()
 	defer c.lock.Unlock()
+	if r.Protocol == l7.ProtocolDNS {
+		return c.onDNSRequest(r)
+	}
+
 	if r.Protocol == l7.ProtocolTrace {
 		//fmt.Println("r.TraceStart:", r.TraceStart)
 		//fmt.Println("r.TraceEnd:", r.TraceEnd)
@@ -51,7 +55,7 @@ func (c *Container) onL7RequestApm(pid uint32, fd uint64, timestamp uint64, r *l
 			//fmt.Println("method:", method)
 			//fmt.Println("path:", path)
 			//fmt.Println("====ProtocolTrace start2====")
-			return
+			return nil
 		}
 		if r.TraceEnd == 1 {
 			//fmt.Println("r:", r)
@@ -63,16 +67,16 @@ func (c *Container) onL7RequestApm(pid uint32, fd uint64, timestamp uint64, r *l
 				delete(c.traceMap, r.TraceId)
 			}
 			//fmt.Println("====ProtocolTrace end1====", ok, r.TraceId)
-			return
+			return nil
 		}
 	}
 	conn := c.connectionsByPidFd[PidFd{Pid: pid, Fd: fd}]
 	//fmt.Println(conn, pid, fd)
 	if conn == nil {
-		return
+		return nil
 	}
 	if timestamp != 0 && conn.Timestamp != timestamp {
-		return
+		return nil
 	}
 	stats := c.l7Stats.get(r.Protocol, conn.Dest, conn.ActualDest)
 	trace := tracing.NewTrace(string(c.id), conn.ActualDest)
@@ -151,4 +155,5 @@ func (c *Container) onL7RequestApm(pid uint32, fd uint64, timestamp uint64, r *l
 	case l7.ProtocolRabbitmq, l7.ProtocolNats:
 		stats.observe(r.Status.String(), r.Method.String(), 0)
 	}
+	return nil
 }

+ 4 - 0
containers/metrics.go

@@ -45,6 +45,7 @@ var metrics = struct {
 	JvmGCTime            *prometheus.Desc
 	JvmSafepointTime     *prometheus.Desc
 	JvmSafepointSyncTime *prometheus.Desc
+	Ip2Fqdn              *prometheus.Desc
 }{
 	ContainerInfo: metric("container_info", "Meta information about the container", "image"),
 
@@ -86,6 +87,7 @@ var metrics = struct {
 	JvmGCTime:            metric("container_jvm_gc_time_seconds", "Time spent in the given JVM garbage collector in seconds", "jvm", "gc"),
 	JvmSafepointTime:     metric("container_jvm_safepoint_time_seconds", "Time the application has been stopped for safepoint operations in seconds", "jvm"),
 	JvmSafepointSyncTime: metric("container_jvm_safepoint_sync_time_seconds", "Time spent getting to safepoints in seconds", "jvm"),
+	Ip2Fqdn:              metric("ip_to_fqdn", "Mapping IP addresses to FQDNs based on DNS requests initiated by containers", "ip", "fqdn"),
 }
 
 var (
@@ -101,6 +103,7 @@ var (
 		l7.ProtocolRabbitmq:  {Name: "container_rabbitmq_messages_total", Help: "Total number of Rabbitmq messages produced or consumed by the container"},
 		l7.ProtocolNats:      {Name: "container_nats_messages_total", Help: "Total number of NATS messages produced or consumed by the container"},
 		l7.ProtocolDubbo2:    {Name: "container_dubbo_requests_total", Help: "Total number of outbound DUBBO requests"},
+		l7.ProtocolDNS:       {Name: "container_dns_requests_total", Help: "Total number of outbound DNS requests"},
 	}
 	L7Latency = map[l7.Protocol]prometheus.HistogramOpts{
 		l7.ProtocolHTTP:      {Name: "container_http_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound HTTP request"},
@@ -112,6 +115,7 @@ var (
 		l7.ProtocolKafka:     {Name: "container_kafka_requests_duration_seconds_total", Help: "Histogram of the execution time for each outbound Kafka request"},
 		l7.ProtocolCassandra: {Name: "container_cassandra_queries_duration_seconds_total", Help: "Histogram of the execution time for each outbound Cassandra request"},
 		l7.ProtocolDubbo2:    {Name: "container_dubbo_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound DUBBO request"},
+		l7.ProtocolDNS:       {Name: "container_dns_requests_duration_seconds_total", Help: "Histogram of the response time for each outbound DNS request"},
 	}
 )
 

+ 37 - 3
containers/registry.go

@@ -6,6 +6,7 @@ import (
 	"os"
 	"regexp"
 	"strings"
+	"sync"
 	"time"
 
 	"github.com/coroot/coroot-node-agent/cgroup"
@@ -15,6 +16,7 @@ import (
 	"github.com/coroot/coroot-node-agent/proc"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/vishvananda/netns"
+	"inet.af/netaddr"
 	"k8s.io/klog/v2"
 )
 
@@ -42,6 +44,8 @@ type Registry struct {
 	containersById       map[ContainerID]*Container
 	containersByCgroupId map[string]*Container
 	containersByPid      map[uint32]*Container
+	ip2fqdn              map[netaddr.IP]string
+	ip2fqdnLock          sync.Mutex
 
 	processInfoCh chan<- ProcessInfo
 }
@@ -97,12 +101,15 @@ func NewRegistry(reg prometheus.Registerer, kernelVersion string, processInfoCh
 		containersById:       map[ContainerID]*Container{},
 		containersByCgroupId: map[string]*Container{},
 		containersByPid:      map[uint32]*Container{},
+		ip2fqdn:              map[netaddr.IP]string{},
 
 		processInfoCh: processInfoCh,
 
 		tracer: ebpftracer.NewTracer(kernelVersion, *flags.DisableL7Tracing),
 	}
-
+	if err = reg.Register(r); err != nil {
+		return nil, err
+	}
 	go r.handleEvents(r.events)
 	if err = r.tracer.Run(r.events); err != nil {
 		close(r.events)
@@ -112,6 +119,18 @@ func NewRegistry(reg prometheus.Registerer, kernelVersion string, processInfoCh
 	return r, nil
 }
 
+func (r *Registry) Describe(ch chan<- *prometheus.Desc) {
+	ch <- metrics.Ip2Fqdn
+}
+
+func (r *Registry) Collect(ch chan<- prometheus.Metric) {
+	r.ip2fqdnLock.Lock()
+	defer r.ip2fqdnLock.Unlock()
+	for ip, fqdn := range r.ip2fqdn {
+		ch <- gauge(metrics.Ip2Fqdn, 1, ip.String(), fqdn)
+	}
+}
+
 func (r *Registry) Close() {
 	r.tracer.Close()
 	close(r.events)
@@ -137,11 +156,14 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 					c.onProcessExit(pid, false)
 				}
 			}
-
+			activeIPs := map[netaddr.IP]struct{}{}
 			for id, c := range r.containersById {
 				if !c.Dead(now) {
 					continue
 				}
+				for dst := range c.connectLastAttempt {
+					activeIPs[dst.IP()] = struct{}{}
+				}
 				klog.Infoln("deleting dead container:", id)
 				for cg, cc := range r.containersByCgroupId {
 					if cc == c {
@@ -163,6 +185,13 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 				delete(r.containersById, id)
 				c.Close()
 			}
+			r.ip2fqdnLock.Lock()
+			for ip := range r.ip2fqdn {
+				if _, ok := activeIPs[ip]; !ok {
+					delete(r.ip2fqdn, ip)
+				}
+			}
+			r.ip2fqdnLock.Unlock()
 		case e, more := <-ch:
 			if e.Pid == uint32(os.Getpid()) {
 				continue
@@ -249,7 +278,12 @@ func (r *Registry) handleEvents(ch <-chan ebpftracer.Event) {
 					continue
 				}
 				if c := r.containersByPid[e.Pid]; c != nil {
-					c.onL7RequestApm(e.Pid, e.Fd, e.Timestamp, e.L7Request)
+					ip2fqdn := c.onL7RequestApm(e.Pid, e.Fd, e.Timestamp, e.L7Request)
+					r.ip2fqdnLock.Lock()
+					for ip, fqdn := range ip2fqdn {
+						r.ip2fqdn[ip] = fqdn
+					}
+					r.ip2fqdnLock.Unlock()
 				}
 			}
 		}

+ 58 - 0
ebpftracer/ebpf/l7/dns.c

@@ -0,0 +1,58 @@
+#define DNS_QR_RESPONSE 0b10000000
+#define DNS_OPCODE 0b01111000
+#define DNS_Z 0b11110000
+#define DNS_RCODE 0b00001111
+
+struct cw_dns_header {
+    __s16 id;
+    __u8 bits0;
+    __u8 bits1;
+    __s16 qdcount;
+};
+
+static __always_inline
+int is_dns_request(char *buf, __u64 buf_size, __s16 *stream_id) {
+    struct cw_dns_header h = {};
+    if (buf_size < sizeof(h)) {
+        return 0;
+    }
+    bpf_read(buf, h);
+    if (h.bits0 & DNS_QR_RESPONSE) {
+        return 0;
+    }
+    if (h.bits0 & DNS_OPCODE) {
+       return 0;
+    }
+    h.qdcount = bpf_ntohs(h.qdcount);
+
+    if (h.qdcount != 1) {
+        return 0;
+    }
+    *stream_id = h.id;
+    return 1;
+}
+
+static __always_inline
+int is_dns_response(char *buf, __u64 buf_size, __s16 *stream_id, __u32 *status) {
+    struct cw_dns_header h = {};
+    if (buf_size < sizeof(h)) {
+        return 0;
+    }
+    bpf_read(buf, h);
+    if (!(h.bits0 & DNS_QR_RESPONSE)) {
+        return 0;
+    }
+    if (h.bits0 & DNS_OPCODE) {
+       return 0;
+    }
+    if (!(h.bits1 & DNS_Z)) {
+        return 0;
+    }
+    h.qdcount = bpf_ntohs(h.qdcount);
+    if (h.qdcount != 1) {
+        return 0;
+    }
+    *status = h.bits1 & DNS_RCODE;
+    *stream_id = h.id;
+    return 1;
+}

+ 44 - 2
ebpftracer/ebpf/l7/l7.c

@@ -1,3 +1,4 @@
+#define PROTOCOL_TRACE	   200
 #define PROTOCOL_UNKNOWN    0
 #define PROTOCOL_HTTP	    1
 #define PROTOCOL_POSTGRES	2
@@ -11,8 +12,9 @@
 #define PROTOCOL_NATS      10
 #define PROTOCOL_HTTP2	   11
 #define PROTOCOL_DUBBO2    12
+#define PROTOCOL_DNS       13
+
 
-#define PROTOCOL_TRACE	   200
 
 #define STATUS_UNKNOWN  0
 #define STATUS_OK       200
@@ -54,6 +56,7 @@
 #include "nats.c"
 #include "http2.c"
 #include "dubbo2.c"
+#include "dns.c"
 #include "apm_trace.c"
 
 
@@ -175,6 +178,9 @@ struct l7_user_msghdr {
 	int msg_namelen;
 	struct l7_iovec *msg_iov;
 	__u64 msg_iovlen;
+	void *msg_control;
+    __u64 msg_controllen;
+    __u32 msg_flags;
 };
 
 static inline __attribute__((__always_inline__))
@@ -411,6 +417,8 @@ int trace_enter_write(void *ctx, __u64 fd, __u16 is_tls, char *buf, __u64 size,
         return 0;
     } else if (is_dubbo2_request(payload, size)) {
         req->protocol = PROTOCOL_DUBBO2;
+    } else if (is_dns_request(payload, size, &k.stream_id)) {
+        req->protocol = PROTOCOL_DNS;
     }
 
     if (req->protocol == PROTOCOL_UNKNOWN) {
@@ -576,7 +584,18 @@ int trace_exit_read(void *ctx, __u64 id, __u32 pid, __u16 is_tls, long int ret)
     struct l7_request *req = bpf_map_lookup_elem(&active_l7_requests, &k);
     int response = 0;
     if (!req) {
-        if (is_cassandra_response(payload, ret, &k.stream_id, &e->status)) {
+        if (is_dns_response(payload, ret, &k.stream_id, &e->status)) {
+            req = bpf_map_lookup_elem(&active_l7_requests, &k);
+            if (!req) {
+                return 0;
+            }
+            e->protocol = PROTOCOL_DNS;
+            e->duration = bpf_ktime_get_ns() - req->ns;
+            e->payload_size = ret;
+            COPY_PAYLOAD(e->payload, ret, payload);
+            send_event(ctx, e, k.pid, k.fd);
+            return 0;
+        } else if (is_cassandra_response(payload, ret, &k.stream_id, &e->status)) {
             req = bpf_map_lookup_elem(&active_l7_requests, &k);
             if (!req) {
                 return 0;
@@ -684,6 +703,29 @@ int sys_enter_sendmsg(struct trace_event_raw_sys_enter_rw__stub* ctx) {
     return trace_enter_write(ctx, ctx->fd, 0, (char*)msghdr.msg_iov, 0, msghdr.msg_iovlen);
 }
 
+//struct cw_mmsghdr {
+//	struct user_msghdr msg_hdr;
+//	__u32 msg_len;
+//};
+
+SEC("tracepoint/syscalls/sys_enter_sendmmsg")
+int sys_enter_sendmmsg(struct trace_event_raw_sys_enter_rw__stub* ctx) {
+    __u64 offset = 0;
+    #pragma unroll
+    for (int i = 0; i <= 1; i++) {
+        if (i >= ctx->size) {
+            break;
+        }
+        struct mmsghdr h = {};
+        if (bpf_probe_read(&h , sizeof(h), (void *)(ctx->buf + offset))) {
+            return 0;
+        }
+        offset += sizeof(h);
+        trace_enter_write(ctx, ctx->fd, 0, (char*)h.msg_hdr.msg_iov, 0, h.msg_hdr.msg_iovlen);
+    }
+    return 0;
+}
+
 SEC("tracepoint/syscalls/sys_enter_sendto")
 int sys_enter_sendto(struct trace_event_raw_sys_enter_rw__stub* ctx) {
     return trace_enter_write(ctx, ctx->fd, 0, ctx->buf, ctx->size, 0);

+ 33 - 0
ebpftracer/l7/dns.go

@@ -0,0 +1,33 @@
+package l7
+
+import (
+	"strings"
+
+	"golang.org/x/net/dns/dnsmessage"
+	"inet.af/netaddr"
+)
+
+func ParseDns(payload []byte) (string, string, []netaddr.IP) {
+	var msg dnsmessage.Message
+	if err := msg.Unpack(payload); err != nil {
+		return "", "", nil
+	}
+	if len(msg.Questions) < 1 {
+		return "", "", nil
+	}
+	var ips []netaddr.IP
+	for _, a := range msg.Answers {
+		switch a.Header.Type {
+		case dnsmessage.TypeA:
+			if r, ok := a.Body.(*dnsmessage.AResource); ok {
+				ips = append(ips, netaddr.IPFrom4(r.A))
+			}
+		case dnsmessage.TypeAAAA:
+			if r, ok := a.Body.(*dnsmessage.AAAAResource); ok {
+				ips = append(ips, netaddr.IPFrom16(r.AAAA))
+			}
+		}
+	}
+	q := msg.Questions[0]
+	return q.Type.String(), strings.TrimSuffix(q.Name.String(), "."), ips
+}

+ 25 - 3
ebpftracer/l7/l7.go

@@ -8,6 +8,8 @@ import (
 type Protocol uint8
 
 const (
+	ProtocolTrace     Protocol = 200
+
 	ProtocolHTTP      Protocol = 1
 	ProtocolPostgres  Protocol = 2
 	ProtocolRedis     Protocol = 3
@@ -20,12 +22,14 @@ const (
 	ProtocolNats      Protocol = 10
 	ProtocolHTTP2     Protocol = 11
 	ProtocolDubbo2    Protocol = 12
+	ProtocolDNS       Protocol = 13
 
-	ProtocolTrace     Protocol = 200
 )
 
 func (p Protocol) String() string {
 	switch p {
+	case ProtocolTrace:
+		return "TRACE"
 	case ProtocolHTTP:
 		return "HTTP"
 	case ProtocolPostgres:
@@ -50,8 +54,8 @@ func (p Protocol) String() string {
 		return "HTTP2"
 	case ProtocolDubbo2:
 		return "Dubbo2"
-	case ProtocolTrace:
-		return "TRACE"
+	case ProtocolDNS:
+		return "DNS"
 	}
 	return "UNKNOWN:" + strconv.Itoa(int(p))
 }
@@ -112,6 +116,24 @@ func (s Status) Http() string {
 	return strconv.Itoa(int(s))
 }
 
+func (s Status) DNS() string {
+	switch s {
+	case 0:
+		return "ok"
+	case 1:
+		return "format_error"
+	case 2:
+		return "servfail"
+	case 3:
+		return "nxdomain"
+	case 4:
+		return "not_implemented"
+	case 5:
+		return "refused"
+	}
+	return ""
+}
+
 func (s Status) Error() bool {
 	return s == StatusFailed
 }

+ 5 - 3
ebpftracer/tracer.go

@@ -223,9 +223,11 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
 	if len(prg) == 0 {
 		return fmt.Errorf("unsupported kernel version: %s", t.kernelVersion)
 	}
+	_, debugFsErr := os.Stat("/sys/kernel/debug/tracing")
+	_, traceFsErr := os.Stat("/sys/kernel/tracing")
 
-	if _, err := os.Stat("/sys/kernel/debug/tracing"); err != nil {
-		return fmt.Errorf("kernel tracing is not available: %w", err)
+	if debugFsErr != nil && traceFsErr != nil {
+		return fmt.Errorf("kernel tracing is not available: debugfs or tracefs must be mounted")
 	}
 
 	collectionSpec, err := ebpf.LoadCollectionSpecFromReader(bytes.NewReader(prg))
@@ -289,7 +291,7 @@ func (t *Tracer) ebpf(ch chan<- Event) error {
 		fmt.Println(programSpec.Name, programSpec.SectionName, programSpec.Type)
 		if t.disableL7Tracing {
 			switch programSpec.Name {
-			case "sys_enter_writev", "sys_enter_write", "sys_enter_sendto", "sys_enter_sendmsg":
+			case "sys_enter_writev", "sys_enter_write", "sys_enter_sendto", "sys_enter_sendmsg", "sys_enter_sendmmsg":
 				continue
 			case "sys_enter_read", "sys_enter_readv", "sys_enter_recvfrom", "sys_enter_recvmsg":
 				continue

+ 30 - 8
flags/flags.go

@@ -25,14 +25,15 @@ var (
 	LogPerSecond      = kingpin.Flag("log-per-second", "The number of logs per second").Default("10.0").Envar("LOG_PER_SECOND").Float64()
 	LogBurst          = kingpin.Flag("log-burst", "The maximum number of tokens that can be consumed in a single call to allow").Default("100").Envar("LOG_BURST").Int()
 
-	MetricsEndpoint  = kingpin.Flag("metrics-endpoint", "The URL of the endpoint to send metrics to").Envar("METRICS_ENDPOINT").URL()
-	TracesEndpoint   = kingpin.Flag("traces-endpoint", "The URL of the endpoint to send traces to").Envar("TRACES_ENDPOINT").URL()
-	LogsEndpoint     = kingpin.Flag("logs-endpoint", "The URL of the endpoint to send logs to").Envar("LOGS_ENDPOINT").URL()
-	ProfilesEndpoint = kingpin.Flag("profiles-endpoint", "The URL of the endpoint to send profiles to").Envar("PROFILES_ENDPOINT").URL()
-	ApiKey           = kingpin.Flag("api-key", "Coroot API key").Envar("API_KEY").String()
-	ScrapeInterval   = kingpin.Flag("scrape-interval", "How often to gather metrics from the agent").Default("15s").Envar("SCRAPE_INTERVAL").Duration()
-
-	WalDir = kingpin.Flag("wal-dir", "Path to where the agent stores data (e.g. the metrics Write-Ahead Log)").Default("/tmp/coroot-node-agent").Envar("WAL_DIR").String()
+	CollectorEndpoint = kingpin.Flag("collector-endpoint", "A base endpoint URL for metrics, traces, logs, and profiles").Envar("COLLECTOR_ENDPOINT").URL()
+	ApiKey            = kingpin.Flag("api-key", "Coroot API key").Envar("API_KEY").String()
+	MetricsEndpoint   = kingpin.Flag("metrics-endpoint", "The URL of the endpoint to send metrics to").Envar("METRICS_ENDPOINT").URL()
+	TracesEndpoint    = kingpin.Flag("traces-endpoint", "The URL of the endpoint to send traces to").Envar("TRACES_ENDPOINT").URL()
+	LogsEndpoint      = kingpin.Flag("logs-endpoint", "The URL of the endpoint to send logs to").Envar("LOGS_ENDPOINT").URL()
+	ProfilesEndpoint  = kingpin.Flag("profiles-endpoint", "The URL of the endpoint to send profiles to").Envar("PROFILES_ENDPOINT").URL()
+
+	ScrapeInterval = kingpin.Flag("scrape-interval", "How often to gather metrics from the agent").Default("15s").Envar("SCRAPE_INTERVAL").Duration()
+	WalDir         = kingpin.Flag("wal-dir", "Path to where the agent stores data (e.g. the metrics Write-Ahead Log)").Default("/tmp/coroot-node-agent").Envar("WAL_DIR").String()
 )
 
 func GetString(fl *string) string {
@@ -46,6 +47,27 @@ func init() {
 	if strings.HasSuffix(os.Args[0], ".test") {
 		return
 	}
+
 	kingpin.HelpFlag.Short('h').Hidden()
 	kingpin.Parse()
+
+	if *CollectorEndpoint != nil {
+		u := *CollectorEndpoint
+		if *MetricsEndpoint == nil {
+			*MetricsEndpoint = u.JoinPath("/v1/metrics")
+		}
+		if *TracesEndpoint == nil {
+			*TracesEndpoint = u.JoinPath("/v1/traces")
+		}
+		if *LogsEndpoint == nil {
+			*LogsEndpoint = u.JoinPath("/v1/logs")
+		}
+		if *ProfilesEndpoint == nil {
+			*ProfilesEndpoint = u.JoinPath("/v1/profiles")
+		}
+	}
+
+	if *MetricsEndpoint != nil {
+		*ListenAddress = "127.0.0.1:10300"
+	}
 }

+ 6 - 5
go.mod

@@ -34,9 +34,9 @@ require (
 	go.opentelemetry.io/otel/sdk v1.22.0
 	go.opentelemetry.io/otel/trace v1.22.0
 	golang.org/x/arch v0.4.0
-	golang.org/x/mod v0.14.0
-	golang.org/x/net v0.20.0
-	golang.org/x/sys v0.16.0
+	golang.org/x/mod v0.16.0
+	golang.org/x/net v0.22.0
+	golang.org/x/sys v0.18.0
 	golang.org/x/time v0.5.0
 	gopkg.in/alecthomas/kingpin.v2 v2.2.6
 	gopkg.in/yaml.v2 v2.4.0
@@ -119,6 +119,7 @@ require (
 	github.com/mdlayher/genetlink v1.3.2 // indirect
 	github.com/mdlayher/netlink v1.7.2 // indirect
 	github.com/mdlayher/socket v0.4.1 // indirect
+	github.com/miekg/dns v1.1.59 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/moby/locker v1.0.1 // indirect
 	github.com/moby/sys/mountinfo v0.6.2 // indirect
@@ -166,12 +167,12 @@ require (
 	go.uber.org/multierr v1.11.0 // indirect
 	go4.org/intern v0.0.0-20211027215823-ae77deb06f29 // indirect
 	go4.org/unsafe/assume-no-moving-gc v0.0.0-20230525183740-e7c30c78aeb2 // indirect
-	golang.org/x/crypto v0.18.0 // indirect
+	golang.org/x/crypto v0.21.0 // indirect
 	golang.org/x/exp v0.0.0-20240119083558-1b970713d09a // indirect
 	golang.org/x/oauth2 v0.16.0 // indirect
 	golang.org/x/sync v0.6.0 // indirect
 	golang.org/x/text v0.14.0 // indirect
-	golang.org/x/tools v0.17.0 // indirect
+	golang.org/x/tools v0.19.0 // indirect
 	google.golang.org/appengine v1.6.8 // indirect
 	google.golang.org/genproto/googleapis/api v0.0.0-20240116215550-a9fa1716bcac // indirect
 	google.golang.org/genproto/googleapis/rpc v0.0.0-20240116215550-a9fa1716bcac // indirect

+ 14 - 14
go.sum

@@ -794,8 +794,8 @@ github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U
 github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
 github.com/mdlayher/taskstats v0.0.0-20230712191918-387b3d561d14 h1:eKehnW2s+3DQYZLAa/Pm04sk1G+k8LlZt0OUDbyYmrI=
 github.com/mdlayher/taskstats v0.0.0-20230712191918-387b3d561d14/go.mod h1:hDhp1SgOluLtKhnB65Wb/j3f7ghQWdOl+XIrbH9yqWc=
-github.com/miekg/dns v1.1.58 h1:ca2Hdkz+cDg/7eNF6V56jjzuZ4aCAE+DbVkILdQWG/4=
-github.com/miekg/dns v1.1.58/go.mod h1:Ypv+3b/KadlvW9vJfXOTf300O4UqaHFzFCuHz+rPkBY=
+github.com/miekg/dns v1.1.59 h1:C9EXc/UToRwKLhK5wKU/I4QVsBUc8kE6MkHBkeypWZs=
+github.com/miekg/dns v1.1.59/go.mod h1:nZpewl5p6IvctfgrckopVx2OlSEHPRO/U4SYkRklrEk=
 github.com/miekg/pkcs11 v1.0.3/go.mod h1:XsNlhZGX73bx86s2hdc/FuaLm2CPZJemRLMA+WTFxgs=
 github.com/mistifyio/go-zfs v2.1.2-0.20190413222219-f784269be439+incompatible/go.mod h1:8AuVvqP/mXw1px98n46wfvcGfQ4ci2FwoAjKYxuo3Z4=
 github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
@@ -1164,8 +1164,8 @@ golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
-golang.org/x/crypto v0.18.0 h1:PGVlW0xEltQnzFZ55hkuX5+KLyrMYhHld1YHO4AKcdc=
-golang.org/x/crypto v0.18.0/go.mod h1:R0j02AL6hcrfOiy9T4ZYp/rcWeMxM3L6QYxlOuEG1mg=
+golang.org/x/crypto v0.21.0 h1:X31++rzVUdKhX5sWmSOFZxx8UW/ldWx55cbf08iNAMA=
+golang.org/x/crypto v0.21.0/go.mod h1:0BP7YvVV9gBbVKyeTG0Gyn+gZm94bibOW5BjDEYAOMs=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -1203,8 +1203,8 @@ golang.org/x/mod v0.4.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.1/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
 golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4=
-golang.org/x/mod v0.14.0 h1:dGoOF9QVLYng8IHTm7BAyWqCqSheQ5pYWGhzW00YJr0=
-golang.org/x/mod v0.14.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
+golang.org/x/mod v0.16.0 h1:QX4fJ0Rr5cPQCF7O9lh9Se4pmwfwskqZfq5moyldzic=
+golang.org/x/mod v0.16.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c=
 golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
 golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -1259,8 +1259,8 @@ golang.org/x/net v0.0.0-20210813160813-60bc85c4be6d/go.mod h1:9nx3DQGgdP8bBQD5qx
 golang.org/x/net v0.0.0-20210825183410-e898025ed96a/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
 golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
-golang.org/x/net v0.20.0 h1:aCL9BSgETF1k+blQaYUBx9hJ9LOGP3gAVemcZlf1Kpo=
-golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY=
+golang.org/x/net v0.22.0 h1:9sGLhx7iRIHEiX0oAJ3MRZMUCElJgy7Br1nO+AMN3Tc=
+golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -1390,12 +1390,12 @@ golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBc
 golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
-golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.18.0 h1:DBdB3niSjOA/O0blCZBqDefyWNYveAYMNF1Wum0DYQ4=
+golang.org/x/sys v0.18.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
-golang.org/x/term v0.16.0 h1:m+B6fahuftsE9qjo0VWp2FW0mB3MTJvR0BaMQrq0pmE=
-golang.org/x/term v0.16.0/go.mod h1:yn7UURbUtPyrVJPGPq404EukNFxcm/foM+bV/bfcDsY=
+golang.org/x/term v0.18.0 h1:FcHjZXDMxI8mM3nwhX9HlKop4C0YQvCVCdwYl2wOtE8=
+golang.org/x/term v0.18.0/go.mod h1:ILwASektA3OnRv7amZ1xhE/KTR+u50pbXfZ03+6Nx58=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -1482,8 +1482,8 @@ golang.org/x/tools v0.0.0-20210108195828-e2f9c7f1fc8e/go.mod h1:emZCQorbCU4vsT4f
 golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0=
 golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk=
 golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc=
-golang.org/x/tools v0.17.0 h1:FvmRgNOcs3kOa+T20R1uhfP9F6HgG2mfxDv1vrx1Htc=
-golang.org/x/tools v0.17.0/go.mod h1:xsh6VxdV005rRVaS6SSAf9oiAqljS7UZUacMZ8Bnsps=
+golang.org/x/tools v0.19.0 h1:tfGCXNR1OsFG+sVdLAitlpjAvD/I6dHDKnYrpEZUHkw=
+golang.org/x/tools v0.19.0/go.mod h1:qoJWxmGSIBmAeriMx19ogtrEPrGtDbPK634QFIcLAhc=
 golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
 golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=

+ 255 - 0
install.sh

@@ -0,0 +1,255 @@
+#!/bin/sh
+set -e
+
+GITHUB_URL="https://github.com/coroot/coroot-node-agent/releases"
+DOWNLOADER=
+SUDO=sudo
+if [ $(id -u) -eq 0 ]; then
+    SUDO=
+fi
+
+BIN_DIR=/usr/bin
+SYSTEMD_DIR=/etc/systemd/system
+VERSION=
+SYSTEM_NAME=coroot-node-agent
+SYSTEMD_SERVICE=${SYSTEM_NAME}.service
+UNINSTALL_SH=${BIN_DIR}/${SYSTEM_NAME}-uninstall.sh
+FILE_SERVICE=${SYSTEMD_DIR}/${SYSTEMD_SERVICE}
+FILE_ENV=${SYSTEMD_DIR}/${SYSTEMD_SERVICE}.env
+ENV_VARS="^(LISTEN|CGROUPFS_ROOT|DISABLE_LOG_PARSING|DISABLE_PINGER|DISABLE_L7_TRACING|TRACK_PUBLIC_NETWORK|EPHEMERAL_PORT_RANGE|PROVIDER|REGION|AVAILABILITY_ZONE|INSTANCE_TYPE|INSTANCE_LIFE_CYCLE|LOG_PER_SECOND|LOG_BURST|COLLECTOR_ENDPOINT|API_KEY|METRICS_ENDPOINT|TRACES_ENDPOINT|LOGS_ENDPOINT|PROFILES_ENDPOINT|SCRAPE_INTERVAL|WAL_DIR)"
+
+info()
+{
+    echo '[INFO] ' "$@"
+}
+
+fatal()
+{
+    echo '[ERROR] ' "$@" >&2
+    exit 1
+}
+
+verify_system() {
+    if [ -x /bin/systemctl ] || type systemctl > /dev/null 2>&1; then
+        return
+    fi
+    fatal 'Cannot find systemd'
+}
+
+verify_executable() {
+    if [ ! -x ${BIN_DIR}/coroot-node-agent ]; then
+        fatal "Executable coroot-node-agent binary not found at ${BIN_DIR}/coroot-node-agent"
+    fi
+}
+
+verify_arch() {
+    if [ -z "$ARCH" ]; then
+        ARCH=$(uname -m)
+    fi
+    case $ARCH in
+        amd64)
+            ARCH=amd64
+            ;;
+        x86_64)
+            ARCH=amd64
+            ;;
+        arm64)
+            ARCH=arm64
+            ;;
+        aarch64)
+            ARCH=arm64
+            ;;
+        *)
+            fatal "Unsupported architecture $ARCH"
+    esac
+}
+
+verify_downloader() {
+    [ -x "$(command -v $1)" ] || return 1
+    DOWNLOADER=$1
+    return 0
+}
+
+setup_tmp() {
+    TMP_DIR=$(mktemp -d -t coroot-agent-install.XXXXXXXXXX)
+    TMP_BIN=${TMP_DIR}/coroot-node-agent
+    cleanup() {
+        code=$?
+        set +e
+        trap - EXIT
+        rm -rf ${TMP_DIR}
+        exit $code
+    }
+    trap cleanup INT EXIT
+}
+
+get_release_version() {
+    info "Finding the latest release"
+    latest_release_url=${GITHUB_URL}/latest
+    case $DOWNLOADER in
+        curl)
+            VERSION=$(curl -w '%{url_effective}' -L -s -S ${latest_release_url} -o /dev/null | sed -e 's|.*/||')
+            ;;
+        wget)
+            VERSION=$(wget -SqO /dev/null ${latest_release_url} 2>&1 | grep -i Location | sed -e 's|.*/||')
+            ;;
+        *)
+            fatal "Incorrect downloader executable '$DOWNLOADER'"
+            ;;
+    esac
+    info "The latest release is ${VERSION}"
+}
+
+download_binary() {
+    info "Downloading binary"
+    URL="${GITHUB_URL}/download/${VERSION}/coroot-node-agent-${ARCH}"
+    set +e
+    case $DOWNLOADER in
+        curl)
+            curl -o ${TMP_BIN} -sfL ${URL}
+            ;;
+        wget)
+            wget -qO ${TMP_BIN} ${URL}
+            ;;
+        *)
+            fatal "Incorrect executable '$DOWNLOADER'"
+            ;;
+    esac
+
+    [ $? -eq 0 ] || fatal 'Download failed'
+    set -e
+}
+
+setup_binary() {
+    chmod 755 ${TMP_BIN}
+    info "Installing coroot-node-agent to ${BIN_DIR}/coroot-node-agent"
+    $SUDO chown root:root ${TMP_BIN}
+    $SUDO mv -f ${TMP_BIN} ${BIN_DIR}/coroot-node-agent
+}
+
+download() {
+    verify_arch
+    verify_downloader curl || verify_downloader wget || fatal 'Can not find curl or wget for downloading files'
+    setup_tmp
+    get_release_version
+    download_binary
+    setup_binary
+}
+
+
+create_uninstall() {
+    info "Creating uninstall script ${UNINSTALL_SH}"
+    $SUDO tee ${UNINSTALL_SH} >/dev/null << EOF
+#!/bin/sh
+set -x
+[ \$(id -u) -eq 0 ] || exec sudo \$0 \$@
+
+systemctl stop ${SYSTEM_NAME}
+systemctl disable ${SYSTEM_NAME}
+systemctl reset-failed ${SYSTEM_NAME}
+systemctl daemon-reload
+
+rm -f ${FILE_SERVICE}
+rm -f ${FILE_ENV}
+
+remove_uninstall() {
+    rm -f ${UNINSTALL_SH}
+}
+trap remove_uninstall EXIT
+
+rm -rf /var/lib/coroot-node-agent || true
+rm -f ${BIN_DIR}/coroot-node-agent
+EOF
+    $SUDO chmod 755 ${UNINSTALL_SH}
+    $SUDO chown root:root ${UNINSTALL_SH}
+}
+
+systemd_disable() {
+    $SUDO systemctl disable ${SYSTEM_NAME} >/dev/null 2>&1 || true
+    $SUDO rm -f ${FILE_SERVICE} || true
+    $SUDO rm -f ${FILE_ENV} || true
+}
+
+create_env_file() {
+    info "env: Creating environment file ${FILE_ENV}"
+    $SUDO touch ${FILE_ENV}
+    $SUDO chmod 0600 ${FILE_ENV}
+    sh -c export | while read x v; do echo $v; done | grep -E ${ENV_VARS} | $SUDO tee ${FILE_ENV} >/dev/null
+}
+
+create_systemd_service_file() {
+    info "systemd: Creating service file ${FILE_SERVICE}"
+    $SUDO tee ${FILE_SERVICE} >/dev/null << EOF
+[Unit]
+Description=Coroot node agent
+Documentation=https://coroot.com
+Wants=network-online.target
+After=network-online.target
+
+[Install]
+WantedBy=multi-user.target
+
+[Service]
+Type=exec
+EnvironmentFile=-/etc/default/%N
+EnvironmentFile=-/etc/sysconfig/%N
+EnvironmentFile=-${FILE_ENV}
+KillMode=process
+Delegate=yes
+# Having non-zero Limit*s causes performance problems due to accounting overhead
+# in the kernel. We recommend using cgroups to do container-local accounting.
+LimitNOFILE=1048576
+LimitNPROC=infinity
+LimitCORE=infinity
+TasksMax=infinity
+TimeoutStartSec=0
+Restart=always
+RestartSec=5s
+ExecStart=${BIN_DIR}/coroot-node-agent
+EOF
+}
+
+create_service_file() {
+    create_systemd_service_file
+    return 0
+}
+
+get_installed_hashes() {
+    $SUDO sha256sum ${BIN_DIR}/coroot-node-agent ${FILE_SERVICE} ${FILE_ENV} 2>&1 || true
+}
+
+systemd_enable() {
+    info "systemd: Enabling ${SYSTEM_NAME} unit"
+    $SUDO systemctl enable ${FILE_SERVICE} >/dev/null
+    $SUDO systemctl daemon-reload >/dev/null
+}
+
+systemd_start() {
+    info "systemd: Starting ${SYSTEM_NAME}"
+    $SUDO systemctl restart ${SYSTEM_NAME}
+}
+
+
+service_enable_and_start() {
+    systemd_enable
+
+    POST_INSTALL_HASHES=$(get_installed_hashes)
+    if [ "${PRE_INSTALL_HASHES}" = "${POST_INSTALL_HASHES}" ]; then
+        info 'No change detected so skipping service start'
+        return
+    fi
+
+    systemd_start
+
+    return 0
+}
+
+{
+    verify_system
+    download
+    create_uninstall
+    systemd_disable
+    create_env_file
+    create_service_file
+    service_enable_and_start
+}

+ 2 - 2
profiling/profiling.go

@@ -54,7 +54,6 @@ func Init(hostId, hostName string) chan<- containers.ProcessInfo {
 	constLabels = labels.Labels{
 		{Name: "host.name", Value: hostName},
 		{Name: "host.id", Value: hostId},
-		{Name: "profile.source", Value: "ebpf"},
 	}
 
 	reg := prometheus.NewRegistry()
@@ -167,6 +166,7 @@ func upload(b *pprof.ProfileBuilder) error {
 	}
 	u.RawQuery = q.Encode()
 
+	b.Profile.SampleType[0].Type = "ebpf:cpu:nanoseconds"
 	b.Profile.DurationNanos = CollectInterval.Nanoseconds()
 	body := bytes.NewBuffer(nil)
 	_, err := b.Write(body)
@@ -174,7 +174,7 @@ func upload(b *pprof.ProfileBuilder) error {
 		return err
 	}
 
-	req, err := http.NewRequest("POST", u.String(), body)
+	req, err := http.NewRequest(http.MethodPost, u.String(), body)
 	if err != nil {
 		return err
 	}

+ 7 - 6
prom/agent.go

@@ -5,6 +5,7 @@ import (
 
 	"github.com/coroot/coroot-node-agent/common"
 	"github.com/coroot/coroot-node-agent/flags"
+	"github.com/go-kit/log/level"
 	"github.com/prometheus/client_golang/prometheus"
 	promConfig "github.com/prometheus/common/config"
 	"github.com/prometheus/common/model"
@@ -25,12 +26,12 @@ const (
 )
 
 func StartAgent(machineId string) error {
-	l := Logger{}
+	logger := level.NewFilter(Logger{}, level.AllowInfo())
 
 	if *flags.MetricsEndpoint == nil {
 		return nil
 	}
-	klog.Infoln("Metrics remote write endpoint:", (*flags.MetricsEndpoint).String())
+	klog.Infoln("metrics remote write endpoint:", (*flags.MetricsEndpoint).String())
 	cfg := config.DefaultConfig
 	cfg.GlobalConfig.ScrapeInterval = model.Duration(*flags.ScrapeInterval)
 	cfg.GlobalConfig.ScrapeTimeout = model.Duration(*flags.ScrapeInterval)
@@ -54,14 +55,14 @@ func StartAgent(machineId string) error {
 	opts := agent.DefaultOptions()
 	localStorage := &readyStorage{stats: tsdb.NewDBStats()}
 	scraper := &readyScrapeManager{}
-	remoteStorage := remote.NewStorage(l, prometheus.DefaultRegisterer, localStorage.StartTime, *flags.WalDir, RemoteFlushDeadline, scraper)
-	fanoutStorage := storage.NewFanout(l, localStorage, remoteStorage)
+	remoteStorage := remote.NewStorage(logger, prometheus.DefaultRegisterer, localStorage.StartTime, *flags.WalDir, RemoteFlushDeadline, scraper)
+	fanoutStorage := storage.NewFanout(logger, localStorage, remoteStorage)
 
 	if err := remoteStorage.ApplyConfig(&cfg); err != nil {
 		return err
 	}
 
-	scrapeManager, err := scrape.NewManager(nil, l, fanoutStorage, prometheus.DefaultRegisterer)
+	scrapeManager, err := scrape.NewManager(nil, logger, fanoutStorage, prometheus.DefaultRegisterer)
 	if err != nil {
 		return err
 	}
@@ -69,7 +70,7 @@ func StartAgent(machineId string) error {
 		return err
 	}
 	scraper.Set(scrapeManager)
-	db, err := agent.Open(l, prometheus.DefaultRegisterer, remoteStorage, *flags.WalDir, opts)
+	db, err := agent.Open(logger, prometheus.DefaultRegisterer, remoteStorage, *flags.WalDir, opts)
 	if err != nil {
 		return err
 	}