tracer.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. package ebpftracer
  2. import "C"
  3. import (
  4. "bytes"
  5. "encoding/binary"
  6. "errors"
  7. "fmt"
  8. "github.com/cilium/ebpf"
  9. "github.com/cilium/ebpf/link"
  10. "github.com/cilium/ebpf/perf"
  11. "github.com/coroot/coroot-node-agent/common"
  12. "github.com/coroot/coroot-node-agent/ebpftracer/l7"
  13. "github.com/coroot/coroot-node-agent/ebpftracer/tracer"
  14. "github.com/coroot/coroot-node-agent/proc"
  15. "golang.org/x/mod/semver"
  16. "golang.org/x/sys/unix"
  17. "inet.af/netaddr"
  18. "k8s.io/klog/v2"
  19. "os"
  20. "runtime"
  21. "strconv"
  22. "strings"
  23. "time"
  24. "unsafe"
  25. )
  26. /*
  27. #define TASK_COMM_LEN 16
  28. #define BURST_DATA_BUF_SIZE 8192 // For brust send buffer
  29. #include <linux/types.h>
  30. struct __tuple_t {
  31. __u8 daddr[16];
  32. __u8 rcv_saddr[16];
  33. __u8 addr_len;
  34. __u8 l4_protocol;
  35. __u16 dport;
  36. __u16 num;
  37. };
  38. struct __socket_data {
  39. __u32 pid;
  40. __u32 tgid;
  41. __u64 coroutine_id;
  42. __u8 source;
  43. __u8 comm[TASK_COMM_LEN];
  44. __u64 socket_id;
  45. struct __tuple_t tuple;
  46. __u32 extra_data;
  47. __u32 extra_data_count;
  48. __u32 tcp_seq;
  49. __u64 thread_trace_id;
  50. __u64 timestamp;
  51. __u8 direction: 1;
  52. __u8 msg_type: 7;
  53. __u64 syscall_len;
  54. __u64 data_seq;
  55. __u16 data_type;
  56. __u16 data_len;
  57. char data[BURST_DATA_BUF_SIZE];
  58. } __attribute__((packed));
  59. struct __socket_data_buffer {
  60. __u32 events_num;
  61. __u32 len;
  62. char data[32760];
  63. };
  64. */
  65. import "C"
  66. type SocketData C.struct___socket_data
  67. type SocketDataBuffer C.struct___socket_data_buffer
  68. const MaxPayloadSize = 1024
  69. type EventType uint32
  70. type EventReason uint32
  71. const (
  72. EventTypeProcessStart EventType = 1
  73. EventTypeProcessExit EventType = 2
  74. EventTypeConnectionOpen EventType = 3
  75. EventTypeConnectionClose EventType = 4
  76. EventTypeConnectionError EventType = 5
  77. EventTypeListenOpen EventType = 6
  78. EventTypeListenClose EventType = 7
  79. EventTypeFileOpen EventType = 8
  80. EventTypeTCPRetransmit EventType = 9
  81. EventTypeL7Request EventType = 10
  82. EventReasonNone EventReason = 0
  83. EventReasonOOMKill EventReason = 1
  84. )
  85. type Event struct {
  86. Type EventType
  87. Reason EventReason
  88. Pid uint32
  89. SrcAddr netaddr.IPPort
  90. DstAddr netaddr.IPPort
  91. Fd uint64
  92. Timestamp uint64
  93. L7Request *l7.RequestData
  94. }
  95. type perfMapType uint8
  96. const (
  97. perfMapTypeProcEvents perfMapType = 1
  98. perfMapTypeTCPEvents perfMapType = 2
  99. perfMapTypeFileEvents perfMapType = 3
  100. perfMapTypeL7Events perfMapType = 4
  101. perfMapTypeSocketEvents perfMapType = 5
  102. )
  103. type Tracer struct {
  104. kernelVersion string
  105. disableL7Tracing bool
  106. collection *ebpf.Collection
  107. readers map[string]*perf.Reader
  108. links []link.Link
  109. uprobes map[string]*ebpf.Program
  110. }
  111. func NewTracer(kernelVersion string, disableL7Tracing bool) *Tracer {
  112. if disableL7Tracing {
  113. klog.Infoln("L7 tracing is disabled")
  114. }
  115. return &Tracer{
  116. kernelVersion: kernelVersion,
  117. disableL7Tracing: disableL7Tracing,
  118. readers: map[string]*perf.Reader{},
  119. uprobes: map[string]*ebpf.Program{},
  120. }
  121. }
  122. func (t *Tracer) Run(events chan<- Event) error {
  123. if err := t.ebpf(events); err != nil {
  124. return err
  125. }
  126. if err := t.init(events); err != nil {
  127. return err
  128. }
  129. return nil
  130. }
  131. func (t *Tracer) Close() {
  132. for _, p := range t.uprobes {
  133. _ = p.Close()
  134. }
  135. for _, l := range t.links {
  136. _ = l.Close()
  137. }
  138. for _, r := range t.readers {
  139. _ = r.Close()
  140. }
  141. t.collection.Close()
  142. }
  143. func (t *Tracer) init(ch chan<- Event) error {
  144. pids, err := proc.ListPids()
  145. if err != nil {
  146. return fmt.Errorf("failed to list pids: %w", err)
  147. }
  148. for _, pid := range pids {
  149. ch <- Event{Type: EventTypeProcessStart, Pid: pid}
  150. }
  151. fds, sockets := readFds(pids)
  152. for _, fd := range fds {
  153. ch <- Event{Type: EventTypeFileOpen, Pid: fd.pid, Fd: fd.fd}
  154. }
  155. listens := map[uint64]bool{}
  156. for _, s := range sockets {
  157. if s.Listen {
  158. listens[uint64(s.pid)<<32|uint64(s.SAddr.Port())] = true
  159. }
  160. }
  161. for _, s := range sockets {
  162. typ := EventTypeConnectionOpen
  163. if s.Listen {
  164. typ = EventTypeListenOpen
  165. } else if listens[uint64(s.pid)<<32|uint64(s.SAddr.Port())] || s.DAddr.Port() > s.SAddr.Port() { // inbound
  166. continue
  167. }
  168. ch <- Event{
  169. Type: typ,
  170. Pid: s.pid,
  171. Fd: s.fd,
  172. SrcAddr: s.SAddr,
  173. DstAddr: s.DAddr,
  174. }
  175. }
  176. return nil
  177. }
  178. type perfMap struct {
  179. name string
  180. perCPUBufferSizePages int
  181. typ perfMapType
  182. }
  183. func (t *Tracer) ebpf(ch chan<- Event) error {
  184. if _, ok := ebpfProg[runtime.GOARCH]; !ok {
  185. return fmt.Errorf("unsupported architecture: %s", runtime.GOARCH)
  186. }
  187. kv := "v" + common.KernelMajorMinor(t.kernelVersion)
  188. var prg []byte
  189. for _, p := range ebpfProg[runtime.GOARCH] {
  190. if semver.Compare(kv, p.v) >= 0 {
  191. prg = p.p
  192. break
  193. }
  194. }
  195. if len(prg) == 0 {
  196. return fmt.Errorf("unsupported kernel version: %s", t.kernelVersion)
  197. }
  198. if _, err := os.Stat("/sys/kernel/debug/tracing"); err != nil {
  199. return fmt.Errorf("kernel tracing is not available: %w", err)
  200. }
  201. collectionSpec, err := ebpf.LoadCollectionSpecFromReader(bytes.NewReader(prg))
  202. if err != nil {
  203. return fmt.Errorf("failed to load collection spec: %w", err)
  204. }
  205. _ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, &unix.Rlimit{Cur: unix.RLIM_INFINITY, Max: unix.RLIM_INFINITY})
  206. tracer.PidFilter(collectionSpec)
  207. opts := &ebpf.CollectionOptions{MapReplacements: make(map[string]*ebpf.Map)}
  208. for _, spec := range collectionSpec.Maps {
  209. fmt.Println("maps:", spec.Name)
  210. }
  211. tracer.MapInit(collectionSpec, opts)
  212. c, err := ebpf.NewCollectionWithOptions(collectionSpec, *opts)
  213. if err != nil {
  214. var verr *ebpf.VerifierError
  215. if errors.As(err, &verr) {
  216. klog.Errorf("%+v", verr)
  217. }
  218. return fmt.Errorf("failed to load collection: %w", err)
  219. }
  220. tracer.Offset()
  221. t.collection = c
  222. perfMaps := []perfMap{
  223. {name: "proc_events", typ: perfMapTypeProcEvents, perCPUBufferSizePages: 4},
  224. {name: "tcp_listen_events", typ: perfMapTypeTCPEvents, perCPUBufferSizePages: 4},
  225. {name: "tcp_connect_events", typ: perfMapTypeTCPEvents, perCPUBufferSizePages: 8},
  226. {name: "tcp_retransmit_events", typ: perfMapTypeTCPEvents, perCPUBufferSizePages: 4},
  227. {name: "file_events", typ: perfMapTypeFileEvents, perCPUBufferSizePages: 4},
  228. }
  229. fmt.Println(len(collectionSpec.Programs))
  230. fmt.Println(len(c.Programs))
  231. tracer.MapInsert(c)
  232. //os.Exit(1)
  233. if !t.disableL7Tracing {
  234. perfMaps = append(perfMaps, perfMap{name: "l7_events", typ: perfMapTypeL7Events, perCPUBufferSizePages: 32})
  235. }
  236. perfMaps = append(perfMaps, perfMap{name: tracer.MAP_PERF_SOCKET_DATA_NAME, typ: perfMapTypeSocketEvents, perCPUBufferSizePages: 64})
  237. fmt.Println("perfMaps start --")
  238. for _, pm := range perfMaps {
  239. fmt.Println(pm.name)
  240. r, err := perf.NewReader(t.collection.Maps[pm.name], pm.perCPUBufferSizePages*os.Getpagesize())
  241. if err != nil {
  242. t.Close()
  243. return fmt.Errorf("failed to create ebpf reader: %w", err)
  244. }
  245. t.readers[pm.name] = r
  246. // event监听
  247. go runEventsReader(pm.name, r, ch, pm.typ)
  248. }
  249. fmt.Println("perfMaps end --")
  250. for _, programSpec := range collectionSpec.Programs {
  251. program := t.collection.Programs[programSpec.Name]
  252. fmt.Println(programSpec.Name, programSpec.SectionName, programSpec.Type)
  253. if t.disableL7Tracing {
  254. switch programSpec.Name {
  255. case "sys_enter_writev", "sys_enter_write", "sys_enter_sendto", "sys_enter_sendmsg":
  256. continue
  257. case "sys_enter_read", "sys_enter_readv", "sys_enter_recvfrom", "sys_enter_recvmsg":
  258. continue
  259. case "sys_exit_read", "sys_exit_readv", "sys_exit_recvfrom", "sys_exit_recvmsg":
  260. continue
  261. }
  262. }
  263. var l link.Link
  264. switch programSpec.Type {
  265. case ebpf.TracePoint:
  266. if strings.Contains(programSpec.SectionName, "prog") {
  267. continue
  268. }
  269. parts := strings.SplitN(programSpec.AttachTo, "/", 2)
  270. l, err = link.Tracepoint(parts[0], parts[1], program, nil)
  271. case ebpf.Kprobe:
  272. if strings.HasPrefix(programSpec.SectionName, "uprobe/") {
  273. fmt.Println("==============uprobe s")
  274. fmt.Println(programSpec.Name, programSpec.SectionName, programSpec.Type)
  275. fmt.Println("==============uprobe e")
  276. t.uprobes[programSpec.Name] = program
  277. continue
  278. }
  279. l, err = link.Kprobe(programSpec.AttachTo, program, nil)
  280. }
  281. if err != nil {
  282. t.Close()
  283. return fmt.Errorf("failed to link program: %w", err)
  284. }
  285. t.links = append(t.links, l)
  286. }
  287. return nil
  288. }
  289. func (t EventType) String() string {
  290. switch t {
  291. case EventTypeProcessStart:
  292. return "process-start"
  293. case EventTypeProcessExit:
  294. return "process-exit"
  295. case EventTypeConnectionOpen:
  296. return "connection-open"
  297. case EventTypeConnectionClose:
  298. return "connection-close"
  299. case EventTypeConnectionError:
  300. return "connection-error"
  301. case EventTypeListenOpen:
  302. return "listen-open"
  303. case EventTypeListenClose:
  304. return "listen-close"
  305. case EventTypeFileOpen:
  306. return "file-open"
  307. case EventTypeTCPRetransmit:
  308. return "tcp-retransmit"
  309. case EventTypeL7Request:
  310. return "l7-request"
  311. }
  312. return "unknown: " + strconv.Itoa(int(t))
  313. }
  314. func (t EventReason) String() string {
  315. switch t {
  316. case EventReasonNone:
  317. return "none"
  318. case EventReasonOOMKill:
  319. return "oom-kill"
  320. }
  321. return "unknown: " + strconv.Itoa(int(t))
  322. }
  323. type procEvent struct {
  324. Type EventType
  325. Pid uint32
  326. Reason uint32
  327. }
  328. type tcpEvent struct {
  329. Fd uint64
  330. Timestamp uint64
  331. Type EventType
  332. Pid uint32
  333. SPort uint16
  334. DPort uint16
  335. SAddr [16]byte
  336. DAddr [16]byte
  337. }
  338. type fileEvent struct {
  339. Type EventType
  340. Pid uint32
  341. Fd uint64
  342. }
  343. type l7Event struct {
  344. Fd uint64
  345. ConnectionTimestamp uint64
  346. Pid uint32
  347. Status uint32
  348. Duration uint64
  349. Protocol uint8
  350. Method uint8
  351. Padding uint16
  352. StatementId uint32
  353. PayloadSize uint64
  354. TraceId uint64
  355. TraceStart uint32
  356. TraceEnd uint32
  357. }
  358. type SocketDataBufferddd struct {
  359. EventsNum uint32
  360. Len uint32
  361. Data [32760]byte
  362. }
  363. const (
  364. TASK_COMM_LEN = 16
  365. BURST_DATA_BUF_SIZE = 8192
  366. )
  367. type Tuple struct {
  368. Daddr [16]uint8
  369. RcvSaddr [16]uint8
  370. AddrLen uint8
  371. L4Protocol uint8
  372. Dport uint16
  373. Num uint16
  374. }
  375. type SocketDatadddd struct {
  376. Pid uint32 // 表示线程号 如果'pid == tgid'表示一个进程, 否则是线程
  377. Tgid uint32 // 进程号
  378. CoroutineID uint64
  379. Source uint8
  380. Comm [TASK_COMM_LEN]byte
  381. SocketID uint64
  382. Tuple Tuple
  383. ExtraData uint32
  384. ExtraDataCount uint32
  385. TcpSeq uint32
  386. ThreadTraceID uint64
  387. Timestamp uint64
  388. Direction uint8
  389. MsgType uint8
  390. SyscallLen uint64
  391. DataSeq uint64
  392. DataType uint16
  393. DataLen uint16
  394. Data [BURST_DATA_BUF_SIZE]byte
  395. }
  396. func runEventsReader(name string, r *perf.Reader, ch chan<- Event, typ perfMapType) {
  397. for {
  398. rec, err := r.Read()
  399. if err != nil {
  400. if errors.Is(err, perf.ErrClosed) {
  401. break
  402. }
  403. continue
  404. }
  405. if rec.LostSamples > 0 {
  406. klog.Errorln(name, "lost samples:", rec.LostSamples)
  407. continue
  408. }
  409. var event Event
  410. switch typ {
  411. case perfMapTypeSocketEvents:
  412. //fmt.Println("perfMapTypeSocketEvents")
  413. //// 假设 rec.RawSample 包含数据,类型为 []byte
  414. //rawData := rec.RawSample
  415. //fmt.Println("perfMapTypeSocketEvents2")
  416. //
  417. //// 创建一个 SocketDataBuffer 结构体实例
  418. //var buffer SocketDataBuffer
  419. //
  420. //// 创建一个字节缓冲区,并将数据填充到其中
  421. //reader := bytes.NewReader(rawData)
  422. //fmt.Println("perfMapTypeSocketEvents3")
  423. //fmt.Println(len(rawData))
  424. //// 使用 binary.Read 函数读取数据并解析为 SocketDataBuffer 结构体实例
  425. //if err := binary.Read(reader, binary.LittleEndian, &buffer); err != nil {
  426. // fmt.Println(reader.Len())
  427. // fmt.Println("Failed to read data:", err)
  428. // continue
  429. //}
  430. //fmt.Println("perfMapTypeSocketEvents4")
  431. //
  432. //// 打印解析后的数据
  433. //fmt.Println("EventsNum:", buffer.EventsNum)
  434. //fmt.Println("Len:", buffer.Len)
  435. //
  436. //// 打印 char data 的内容
  437. //fmt.Printf("Data: %s\n", string(buffer.Data[:buffer.Len])) // 仅打印实际长度的数据
  438. //socketDataBuffer := rec.RawSample
  439. buf := (*SocketDataBuffer)(unsafe.Pointer(&rec.RawSample[0])) //nolint:gosec
  440. socketData := (*SocketData)(unsafe.Pointer(&buf.data[0])) //nolint:gosec
  441. //socketData := (*(*[128]byte)(unsafe.Pointer(&eventC.line)))
  442. //dataPtr := unsafe.Pointer(&buf.data[0])
  443. //socketData := (*SocketData)(dataPtr)
  444. //reader2 := bytes.NewBuffer(rec.RawSample)
  445. // 222222
  446. //fmt.Println("socketData.Pid:", socketData.pid)
  447. //fmt.Println("socketData.Tgid:", socketData.tgid)
  448. //fmt.Println("socketData.CoroutineID:", socketData.coroutine_id)
  449. //fmt.Println("socketData.Source:", socketData.source)
  450. //
  451. //fmt.Printf("socketData.Comm: %s \n", socketData.comm)
  452. //fmt.Printf("socketData.SocketID :%v \n", socketData.socket_id)
  453. //fmt.Println("socketData.Tuple:", socketData.Tuple)
  454. //fmt.Println("socketData.ExtraData:", socketData.ExtraData)
  455. //fmt.Println("socketData.ExtraDataCount:", socketData.ExtraDataCount)
  456. //fmt.Println("socketData.TCPSeq:", socketData.TcpSeq)
  457. //fmt.Println("socketData.ThreadTraceID:", socketData.ThreadTraceID)
  458. //fmt.Println("socketData.Timestamp:", socketData.Timestamp)
  459. //fmt.Println("socketData.Direction:", socketData.Direction)
  460. //fmt.Println("socketData.MsgType:", socketData.MsgType)
  461. //fmt.Println("socketData.SyscallLen:", socketData.SyscallLen)
  462. //fmt.Println("socketData.DataSeq:", socketData.DataSeq)
  463. fmt.Printf("socketData.DataType:%d \n", (socketData.data_type))
  464. fmt.Printf("socketData.DataLen:%d \n", (socketData.data_len))
  465. //fmt.Println("socketData.Data:", len(socketData.Data))
  466. //socketData := &SocketData{}
  467. //reader := bytes.NewBuffer(rec.RawSample)
  468. //if err := binary.Read(reader, binary.LittleEndian, v); err != nil {
  469. // klog.Warningln("failed1 to read msg:", err)
  470. // continue
  471. //}
  472. //
  473. //var data []byte
  474. //payload := reader.Bytes()
  475. //switch {
  476. //case v.Len == 0:
  477. //case v.Len > 32760:
  478. // data = payload[:32760]
  479. //default:
  480. // data = payload[:v.Len]
  481. //}
  482. //////data2 := data[:v.Len]
  483. ////fmt.Println("perfMapTypeSocketEvents")
  484. //fmt.Println(v.EventsNum)
  485. //fmt.Println(v.Len)
  486. //fmt.Println(string(data))
  487. //
  488. //var data2 SocketData
  489. //reader2 := bytes.NewBuffer(data)
  490. //if err := binary.Read(reader2, binary.LittleEndian, data2); err != nil {
  491. // klog.Warningln("failed2 to read msg:", err)
  492. // continue
  493. //}
  494. //
  495. //fmt.Println(data2.Pid)
  496. //fmt.Println(data2.Tgid)
  497. //fmt.Println(string(v.Data))
  498. //continue
  499. case perfMapTypeL7Events:
  500. v := &l7Event{}
  501. reader := bytes.NewBuffer(rec.RawSample)
  502. if err := binary.Read(reader, binary.LittleEndian, v); err != nil {
  503. klog.Warningln("failed to read msg:", err)
  504. continue
  505. }
  506. payload := reader.Bytes()
  507. req := &l7.RequestData{
  508. Protocol: l7.Protocol(v.Protocol),
  509. Status: l7.Status(v.Status),
  510. Duration: time.Duration(v.Duration),
  511. Method: l7.Method(v.Method),
  512. StatementId: v.StatementId,
  513. TraceId: v.TraceId,
  514. TraceStart: v.TraceStart,
  515. TraceEnd: v.TraceEnd,
  516. }
  517. switch {
  518. case v.PayloadSize == 0:
  519. case v.PayloadSize > MaxPayloadSize:
  520. req.Payload = payload[:MaxPayloadSize]
  521. default:
  522. req.Payload = payload[:v.PayloadSize]
  523. }
  524. fmt.Println("==========")
  525. fmt.Println("req.Payload:", string(req.Payload))
  526. fmt.Println("==========")
  527. event = Event{Type: EventTypeL7Request, Pid: v.Pid, Fd: v.Fd, Timestamp: v.ConnectionTimestamp, L7Request: req}
  528. case perfMapTypeFileEvents:
  529. v := &fileEvent{}
  530. if err := binary.Read(bytes.NewBuffer(rec.RawSample), binary.LittleEndian, v); err != nil {
  531. klog.Warningln("failed to read msg:", err)
  532. continue
  533. }
  534. event = Event{Type: v.Type, Pid: v.Pid, Fd: v.Fd}
  535. case perfMapTypeProcEvents:
  536. v := &procEvent{}
  537. if err := binary.Read(bytes.NewBuffer(rec.RawSample), binary.LittleEndian, v); err != nil {
  538. klog.Warningln("failed to read msg:", err)
  539. continue
  540. }
  541. event = Event{Type: v.Type, Reason: EventReason(v.Reason), Pid: v.Pid}
  542. case perfMapTypeTCPEvents:
  543. v := &tcpEvent{}
  544. if err := binary.Read(bytes.NewBuffer(rec.RawSample), binary.LittleEndian, v); err != nil {
  545. klog.Warningln("failed to read msg:", err)
  546. continue
  547. }
  548. event = Event{
  549. Type: v.Type,
  550. Pid: v.Pid,
  551. SrcAddr: ipPort(v.SAddr, v.SPort),
  552. DstAddr: ipPort(v.DAddr, v.DPort),
  553. Fd: v.Fd,
  554. Timestamp: v.Timestamp,
  555. }
  556. default:
  557. continue
  558. }
  559. ch <- event
  560. }
  561. }
  562. func ipPort(ip [16]byte, port uint16) netaddr.IPPort {
  563. i, _ := netaddr.FromStdIP(ip[:])
  564. return netaddr.IPPortFrom(i, port)
  565. }