socket_trace.c 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232
  1. /*
  2. * Copyright (c) 2022 Yunshan Networks
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include "config.h"
  17. #include "include/socket_trace.h"
  18. #include "include/task_struct_utils.h"
  19. #define OFFSET_READY 1
  20. #define OFFSET_NO_READY 0
  21. #define NS_PER_US 1000ULL
  22. #define NS_PER_SEC 1000000000ULL
  23. #define PROTO_INFER_CACHE_SIZE 80
  24. #define SUBMIT_OK (0)
  25. #define SUBMIT_INVALID (-1)
  26. #define SUBMIT_ABORT (-2)
  27. /***********************************************************
  28. * map definitions
  29. ***********************************************************/
  30. /*
  31. * 向用户态传递数据的专用map
  32. */
  33. MAP_PERF_EVENT(socket_data, int, __u32, MAX_CPU)
  34. /*
  35. * Why use two Tail Calls jmp tables ?
  36. *
  37. * struct bpf_array { ... enum bpf_prog_type owner_prog_type}
  38. * 'ownership' of prog_array is claimed by the first program that
  39. * is going to use this map or by the first program which FD is stored
  40. * in the map to make sure that all callers and callees have the same
  41. * prog_type and JITed flag.
  42. *
  43. * Tail Calls jmp table
  44. * We divide the data processing and data output into two parts, and each has a -
  45. * different eBPF program for processing.
  46. * The purpose of this is to prevent the problem of the number of instructions exceeding max limit.
  47. *
  48. * 'progs_jmp_kp_map' for kprobe/uprobe (`A -> B`, both A and B are [k/u]probe program)
  49. * 'progs_jmp_tp_map' for tracepoint (`A -> B`, both A and B are tracepoint program)
  50. */
  51. MAP_PROG_ARRAY(progs_jmp_kp_map, __u32, __u32, PROG_KP_NUM)
  52. MAP_PROG_ARRAY(progs_jmp_tp_map, __u32, __u32, PROG_TP_NUM)
  53. MAP_PROG_ARRAY(progs_jmp_up_map, __u32, __u32, PROG_UP_NUM)
  54. /*
  55. * 因为ebpf栈只有512字节无法存放http数据,这里使用map做为buffer。
  56. */
  57. MAP_PERARRAY(data_buf, __u32, struct __socket_data_buffer, 1)
  58. /*
  59. * For protocol infer buffer
  60. */
  61. struct ctx_info_s {
  62. union {
  63. struct infer_data_s {
  64. __u32 len;
  65. char data[64];
  66. } infer_buf;
  67. struct tail_calls_context tail_call;
  68. };
  69. };
  70. MAP_PERARRAY(ctx_info, __u32, struct ctx_info_s, 1)
  71. /*
  72. * 结构体成员偏移
  73. */
  74. MAP_PERARRAY(members_offset, __u32, struct member_fields_offset, 1)
  75. /*
  76. * 记录追踪各种ID值(确保唯一性, per CPU 没有使用锁)
  77. * 生成方法:
  78. * 1、先初始化一个基值(基值 = [CPU IDX: 8bit] + [ sys_boot_time ])
  79. * 2、在基值的基础上递增
  80. * CPU IDX: 8bit [0 - 255]个CPU。
  81. * sys_boot_time: 56bit 使用1970年1月1日00:00:00开始到现在纳秒时间/100
  82. *
  83. * 按照每秒钟处理 10,000,000 Requests (这是一个很大值,实际达不到)这样的一个速率,
  84. * 可以存储176年(如果从2022年开始)的数据而UID不会出现重复。
  85. * ((2^56 - 1) - sys_boot_time)/10/1000/1000/60/60/24/365 = 176 years
  86. */
  87. MAP_PERARRAY(trace_conf_map, __u32, struct trace_conf_t, 1)
  88. /*
  89. * 对各类map进行统计
  90. */
  91. MAP_ARRAY(trace_stats_map, __u32, struct trace_stats, 1)
  92. // key: protocol id, value: is protocol enabled, size: PROTO_NUM
  93. MAP_ARRAY(protocol_filter, int, int, PROTO_NUM)
  94. MAP_ARRAY(allow_port_bitmap, __u32, struct allow_port_bitmap, 1)
  95. // write() syscall's input argument.
  96. // Key is {tgid, pid}.
  97. BPF_HASH(active_write_args_map, __u64, struct data_args_t)
  98. // read() syscall's input argument.
  99. // Key is {tgid, pid}.
  100. BPF_HASH(active_read_args_map, __u64, struct data_args_t)
  101. // socket_info_map, 这是个hash表,用于记录socket信息,
  102. // Key is {pid + fd}. value is struct socket_info_t
  103. BPF_HASH(socket_info_map, __u64, struct socket_info_t)
  104. // Key is struct trace_key_t. value is trace_info_t
  105. BPF_HASH(trace_map, struct trace_key_t, struct trace_info_t)
  106. // Stores the identity used to fit the kernel, key: 0, vlaue:{tgid, pid}
  107. MAP_ARRAY(adapt_kern_uid_map, __u32, __u64, 1)
  108. #ifdef LINUX_VER_5_2_PLUS
  109. /*
  110. * Fast matching cache, used to speed up protocol inference.
  111. * Due to the limitation of the number of eBPF instruction in kernel, this feature
  112. * is suitable for Linux5.2+
  113. * key : The high 16 bits of the process-ID/thread-ID
  114. * value : struct proto_infer_cache_t
  115. * The process-ID/thread-ID range [0, 5242880], if the process value exceeds the
  116. * maximum value range, fast cache matching becomes invalid.
  117. */
  118. MAP_ARRAY(proto_infer_cache_map, __u32, struct proto_infer_cache_t, PROTO_INFER_CACHE_SIZE)
  119. #endif
  120. static __inline bool is_protocol_enabled(int protocol)
  121. {
  122. int *enabled = protocol_filter__lookup(&protocol);
  123. return (enabled) ? (*enabled) : (0);
  124. }
  125. static __inline void delete_socket_info(__u64 conn_key,
  126. struct socket_info_t *socket_info_ptr)
  127. {
  128. if (socket_info_ptr == NULL)
  129. return;
  130. __u32 k0 = 0;
  131. struct trace_stats *trace_stats = trace_stats_map__lookup(&k0);
  132. if (trace_stats == NULL)
  133. return;
  134. if (!socket_info_map__delete(&conn_key)) {
  135. __sync_fetch_and_add(&trace_stats->
  136. socket_map_count, -1);
  137. }
  138. }
  139. static __u32 __inline get_tcp_write_seq_from_fd(int fd)
  140. {
  141. __u32 k0 = 0;
  142. struct member_fields_offset *offset = members_offset__lookup(&k0);
  143. if (!offset)
  144. return 0;
  145. void *sock = get_socket_from_fd(fd, offset);
  146. __u32 tcp_seq = 0;
  147. bpf_probe_read(&tcp_seq, sizeof(tcp_seq),
  148. sock + offset->tcp_sock__write_seq_offset);
  149. return tcp_seq;
  150. }
  151. static __u32 __inline get_tcp_read_seq_from_fd(int fd)
  152. {
  153. __u32 k0 = 0;
  154. struct member_fields_offset *offset = members_offset__lookup(&k0);
  155. if (!offset)
  156. return 0;
  157. void *sock = get_socket_from_fd(fd, offset);
  158. if (sock == NULL)
  159. return 0;
  160. __u32 tcp_seq = 0;
  161. bpf_probe_read(&tcp_seq, sizeof(tcp_seq),
  162. sock + offset->tcp_sock__copied_seq_offset);
  163. return tcp_seq;
  164. }
  165. /*
  166. * B : buffer
  167. * O : buffer offset, e.g.: infer_buf->len
  168. * I : &args->iov[i]
  169. * L_T : total_size
  170. * L_C : bytes_copy
  171. * F : first_iov
  172. * F_S : first_iov_size
  173. */
  174. #define COPY_IOV(B, O, I, L_T, L_C, F, F_S) do { \
  175. struct iovec iov_cpy; \
  176. bpf_probe_read(&iov_cpy, sizeof(struct iovec), (I)); \
  177. if (iov_cpy.iov_base == NULL || iov_cpy.iov_len == 0) continue; \
  178. if (!(F)) { \
  179. F = iov_cpy.iov_base; \
  180. F_S = iov_cpy.iov_len; \
  181. } \
  182. const int bytes_remaining = (L_T) - (L_C); \
  183. __u32 iov_size = \
  184. iov_cpy.iov_len < \
  185. bytes_remaining ? iov_cpy.iov_len : bytes_remaining; \
  186. __u32 len = (O) + (L_C); \
  187. struct copy_data_s *cp = (struct copy_data_s *)((B) + len); \
  188. if (len > (sizeof((B)) - sizeof(*cp))) \
  189. break; \
  190. if (iov_size >= sizeof(cp->data)) { \
  191. bpf_probe_read(cp->data, sizeof(cp->data), iov_cpy.iov_base); \
  192. iov_size = sizeof(cp->data); \
  193. } else { \
  194. iov_size = iov_size & (sizeof(cp->data) - 1); \
  195. bpf_probe_read(cp->data, iov_size + 1, iov_cpy.iov_base); \
  196. } \
  197. L_C = (L_C) + iov_size; \
  198. } while (0)
  199. static __inline int iovecs_copy(struct __socket_data *v,
  200. struct __socket_data_buffer *v_buff,
  201. const struct data_args_t* args,
  202. size_t syscall_len,
  203. __u32 send_len)
  204. {
  205. #define LOOP_LIMIT 12
  206. struct copy_data_s {
  207. char data[CAP_DATA_SIZE];
  208. };
  209. int bytes_copy = 0;
  210. __u32 total_size = 0;
  211. if (syscall_len >= sizeof(v->data))
  212. total_size = sizeof(v->data);
  213. else
  214. total_size = send_len;
  215. if (total_size > syscall_len)
  216. total_size = syscall_len;
  217. char *first_iov = NULL;
  218. __u32 first_iov_size = 0;
  219. #pragma unroll
  220. for (unsigned int i = 0;
  221. i < LOOP_LIMIT && i < args->iovlen && bytes_copy < total_size;
  222. ++i) {
  223. COPY_IOV(v_buff->data,
  224. v_buff->len + offsetof(typeof(struct __socket_data),
  225. data), &args->iov[i],
  226. total_size, bytes_copy, first_iov, first_iov_size);
  227. }
  228. return bytes_copy;
  229. }
  230. static __inline int infer_iovecs_copy(struct infer_data_s *infer_buf,
  231. const struct data_args_t *args,
  232. size_t syscall_len,
  233. __u32 copy_len,
  234. char **f_iov,
  235. __u32 *f_iov_len)
  236. {
  237. #define INFER_COPY_SZ 32
  238. #define INFER_LOOP_LIMIT 4
  239. struct copy_data_s {
  240. char data[INFER_COPY_SZ];
  241. };
  242. int bytes_copy = 0;
  243. __u32 total_size = 0;
  244. infer_buf->len = 0;
  245. if (syscall_len >= sizeof(infer_buf->data))
  246. total_size = sizeof(infer_buf->data);
  247. else
  248. total_size = copy_len;
  249. if (total_size > syscall_len)
  250. total_size = syscall_len;
  251. char *first_iov = NULL;
  252. __u32 first_iov_size = 0;
  253. #pragma unroll
  254. for (unsigned int i = 0;
  255. i < INFER_LOOP_LIMIT && i < args->iovlen && bytes_copy < total_size;
  256. i++) {
  257. COPY_IOV(infer_buf->data, infer_buf->len, &args->iov[i],
  258. total_size, bytes_copy, first_iov, first_iov_size);
  259. }
  260. *f_iov = first_iov;
  261. *f_iov_len = first_iov_size;
  262. return bytes_copy;
  263. }
  264. #include "uprobe_base_bpf.c"
  265. #include "include/protocol_inference.h"
  266. #define EVENT_BURST_NUM 1
  267. #define CONN_PERSIST_TIME_MAX_NS 100000000000ULL
  268. static __inline struct trace_key_t get_trace_key(__u64 timeout, bool is_socket_io)
  269. {
  270. __u64 pid_tgid = bpf_get_current_pid_tgid();
  271. __u64 goid = 0;
  272. if (timeout){
  273. goid = get_rw_goid(timeout * NS_PER_SEC, is_socket_io);
  274. }
  275. struct trace_key_t key = {};
  276. key.tgid = (__u32)(pid_tgid >> 32);
  277. if (goid) {
  278. key.goid = goid;
  279. } else {
  280. key.pid = (__u32)pid_tgid;
  281. }
  282. // debug("key.tgid:%llu|%llu",key.tgid,key.goid);
  283. // debug("key.pid:%d",(__u32)pid_tgid);
  284. // debug("key.goid:%llu",key.goid);
  285. return key;
  286. }
  287. static __inline unsigned int __retry_get_sock_flags(void *sk,
  288. int offset)
  289. {
  290. unsigned int flags = 0;
  291. bpf_probe_read(&flags, sizeof(flags), (void *)sk + offset);
  292. return flags;
  293. }
  294. static __inline void infer_sock_flags(void *sk,
  295. struct member_fields_offset *offset)
  296. {
  297. struct sock_flags_t {
  298. unsigned int sk_padding : 1;
  299. unsigned int sk_kern_sock : 1;
  300. unsigned int sk_no_check_tx : 1;
  301. unsigned int sk_no_check_rx : 1;
  302. unsigned int sk_userlocks : 4;
  303. unsigned int sk_protocol : 8;
  304. unsigned int sk_type : 16;
  305. };
  306. // Member '__sk_flags_offset' the offset in struct sock
  307. // 0x220 for 4.19.90-23.15.v2101.ky10.x86_64
  308. // 0x238 for 5.10.0-60.18.0.50.h322_1.hce2.aarch64
  309. #ifdef LINUX_VER_KYLIN
  310. int sock_flags_offset_array[] = {0x1f0, 0x1f8, 0x200, 0x208, 0x210, 0x218, 0x220};
  311. #elif defined LINUX_VER_5_2_PLUS
  312. // 0x230 for OEL7.9 Linux 5.4.17
  313. int sock_flags_offset_array[] = {0x1f0, 0x1f8, 0x200, 0x208, 0x210, 0x218, 0x230, 0x238};
  314. #else
  315. int sock_flags_offset_array[] = {0x1f0, 0x1f8, 0x200, 0x208, 0x210, 0x218};
  316. #endif
  317. unsigned int flags = 0;
  318. struct sock_flags_t *sk_flags = (struct sock_flags_t *)&flags;
  319. int i;
  320. #pragma unroll
  321. for (i = 0; i < ARRAY_SIZE(sock_flags_offset_array); i++) {
  322. flags = __retry_get_sock_flags(sk, sock_flags_offset_array[i]);
  323. /*
  324. * struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
  325. * struct proto *prot, int kern)
  326. *
  327. * -》sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
  328. * 在申请sock时,使用了__GFP_ZERO,为了尽量确保准确性增加一个sk_padding为0判断。
  329. */
  330. if ((sk_flags->sk_type == SOCK_DGRAM
  331. || sk_flags->sk_type == SOCK_STREAM)
  332. && sk_flags->sk_kern_sock == 0
  333. && sk_flags->sk_padding == 0) {
  334. offset->sock__flags_offset = sock_flags_offset_array[i];
  335. break;
  336. }
  337. }
  338. }
  339. static __inline void get_sock_flags(void *sk,
  340. struct member_fields_offset *offset,
  341. struct conn_info_t *conn_info)
  342. {
  343. struct sock_flags_t {
  344. unsigned int sk_padding : 1;
  345. unsigned int sk_kern_sock : 1;
  346. unsigned int sk_no_check_tx : 1;
  347. unsigned int sk_no_check_rx : 1;
  348. unsigned int sk_userlocks : 4;
  349. unsigned int sk_protocol : 8;
  350. unsigned int sk_type : 16;
  351. };
  352. unsigned int flags = 0;
  353. struct sock_flags_t *sk_flags = (struct sock_flags_t *)&flags;
  354. bpf_probe_read(&flags, sizeof(flags), (void *)sk +
  355. offset->sock__flags_offset);
  356. conn_info->sk_type = sk_flags->sk_type;
  357. }
  358. /*
  359. * IPv4 connections can be handled with the v6 API by using the
  360. * v4-mapped-on-v6 address type;thus a program needs to
  361. * support only this API type to support both protocols.This
  362. * is handled transparently by the address handling functions
  363. * in the C library . IPv4 and IPv6 share the local port space.
  364. * When you get an IPv4 connection or packet to a IPv6 socket,
  365. * its source address will be mapped to v6 and it will be mapped
  366. * to v6. The address notation for IPv6 is a group of 8 4 -digit
  367. * hexadecimal numbers, separated with a ':'."::" stands for a
  368. * string of 0 bits.
  369. * Special addresses are
  370. * ::1 for loopback and ::FFFF:<IPv4 address> for IPv4-mapped-on-IPv6.
  371. */
  372. /*
  373. * Confirm whether you want to obtain IP through socket IPv4 address
  374. *
  375. * @s sock address
  376. * @f skc_family
  377. */
  378. #define ipv4_mapped_on_ipv6_confirm(s, f, o) \
  379. do { \
  380. char __addr[16]; \
  381. bpf_probe_read(__addr, 16, \
  382. (s) + o->struct_sock_ip6saddr_offset); \
  383. __u32 __feature = *(__u32 *)&__addr[8]; \
  384. if (__feature == 0xffff0000) \
  385. f = PF_INET; \
  386. } while(0)
  387. static __inline int is_tcp_udp_data(void *sk,
  388. struct member_fields_offset *offset,
  389. struct conn_info_t *conn_info)
  390. {
  391. struct skc_flags_t {
  392. unsigned char skc_reuse : 4;
  393. unsigned char skc_reuseport : 1;
  394. unsigned char skc_ipv6only : 1;
  395. unsigned char skc_net_refcnt : 1;
  396. };
  397. struct skc_flags_t skc_flags;
  398. bpf_probe_read(&skc_flags, sizeof(skc_flags),
  399. sk + offset->struct_sock_common_ipv6only_offset);
  400. conn_info->skc_ipv6only = skc_flags.skc_ipv6only;
  401. bpf_probe_read(&conn_info->skc_family, sizeof(conn_info->skc_family),
  402. sk + offset->struct_sock_family_offset);
  403. /*
  404. * Without thinking about PF_UNIX.
  405. */
  406. switch (conn_info->skc_family) {
  407. case PF_INET:
  408. break;
  409. case PF_INET6:
  410. if (conn_info->skc_ipv6only == 0) {
  411. ipv4_mapped_on_ipv6_confirm(sk, conn_info->skc_family, offset);
  412. }
  413. break;
  414. default:
  415. return SOCK_CHECK_TYPE_ERROR;
  416. }
  417. get_sock_flags(sk, offset, conn_info);
  418. if (conn_info->sk_type == SOCK_DGRAM) {
  419. conn_info->tuple.l4_protocol = IPPROTO_UDP;
  420. return SOCK_CHECK_TYPE_UDP;
  421. }
  422. if (conn_info->sk_type != SOCK_STREAM) {
  423. return SOCK_CHECK_TYPE_ERROR;
  424. }
  425. unsigned char skc_state;
  426. bpf_probe_read(&skc_state, sizeof(skc_state),
  427. (void *)sk + offset->struct_sock_skc_state_offset);
  428. /* 如果连接尚未建立好,不处于ESTABLISHED或者CLOSE_WAIT状态,退出 */
  429. if ((1 << skc_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
  430. return SOCK_CHECK_TYPE_ERROR;
  431. }
  432. conn_info->tuple.l4_protocol = IPPROTO_TCP;
  433. return SOCK_CHECK_TYPE_TCP_ES;
  434. }
  435. static __inline void init_conn_info(__u32 tgid, __u32 fd,
  436. struct conn_info_t *conn_info, void *sk,
  437. struct member_fields_offset *offset)
  438. {
  439. __be16 inet_dport;
  440. __u16 inet_sport;
  441. bpf_probe_read(&inet_dport, sizeof(inet_dport), sk + offset->struct_sock_dport_offset);
  442. bpf_probe_read(&inet_sport, sizeof(inet_sport), sk + offset->struct_sock_sport_offset);
  443. conn_info->tuple.dport = __bpf_ntohs(inet_dport);
  444. conn_info->tuple.num = inet_sport;
  445. conn_info->correlation_id = -1; // 当前用于kafka协议推断
  446. conn_info->fd = fd;
  447. conn_info->sk = sk;
  448. __u64 conn_key = gen_conn_key_id((__u64)tgid, (__u64)conn_info->fd);
  449. conn_info->socket_info_ptr =
  450. socket_info_map__lookup(&conn_key);
  451. }
  452. static __inline bool get_socket_info(struct __socket_data *v, void *sk,
  453. struct conn_info_t *conn_info)
  454. {
  455. if (v == NULL || sk == NULL)
  456. return false;
  457. unsigned int k0 = 0;
  458. struct member_fields_offset *offset = members_offset__lookup(&k0);
  459. if (!offset)
  460. return false;
  461. /*
  462. * Without thinking about PF_UNIX.
  463. */
  464. switch (conn_info->skc_family) {
  465. case PF_INET:
  466. bpf_probe_read(v->tuple.rcv_saddr, 4,
  467. sk + offset->struct_sock_saddr_offset);
  468. bpf_probe_read(v->tuple.daddr, 4,
  469. sk + offset->struct_sock_daddr_offset);
  470. v->tuple.addr_len = 4;
  471. break;
  472. case PF_INET6:
  473. if (sk + offset->struct_sock_ip6saddr_offset >= 0) {
  474. bpf_probe_read(
  475. v->tuple.rcv_saddr, 16,
  476. sk + offset->struct_sock_ip6saddr_offset);
  477. }
  478. if (sk + offset->struct_sock_ip6daddr_offset >= 0) {
  479. bpf_probe_read(
  480. v->tuple.daddr, 16,
  481. sk + offset->struct_sock_ip6daddr_offset);
  482. }
  483. v->tuple.addr_len = 16;
  484. break;
  485. default:
  486. return false;
  487. }
  488. return true;
  489. }
  490. #ifdef PROBE_CONN_SUBMIT
  491. static __inline void connect_submit(struct pt_regs *ctx, struct conn_info_t *v, int act)
  492. {
  493. switch (act) {
  494. case CONN_ADD:
  495. v->type = SOCK_ADD_EVENT;
  496. break;
  497. case CONN_DEL:
  498. v->type = SOCK_INFO_EVENT;
  499. break;
  500. default:
  501. return;
  502. }
  503. int ret = bpf_perf_event_output(ctx, &NAME(socket_data),
  504. BPF_F_CURRENT_CPU, v,
  505. 128);
  506. if (ret) bpf_debug("connect_submit: %d\n", ret);
  507. }
  508. #endif
  509. static __inline void
  510. infer_l7_class(struct ctx_info_s *ctx,
  511. struct conn_info_t* conn_info,
  512. enum traffic_direction direction,
  513. const struct data_args_t *args,
  514. size_t count, __u8 sk_type,
  515. const struct process_data_extra *extra)
  516. {
  517. if (conn_info == NULL) {
  518. return;
  519. }
  520. // 推断应用协议
  521. struct protocol_message_t inferred_protocol =
  522. infer_protocol(ctx, args, count, conn_info, sk_type, extra);
  523. if (inferred_protocol.protocol == PROTO_UNKNOWN &&
  524. inferred_protocol.type == MSG_UNKNOWN) {
  525. conn_info->protocol = PROTO_UNKNOWN;
  526. return;
  527. }
  528. conn_info->protocol = inferred_protocol.protocol;
  529. conn_info->message_type = inferred_protocol.type;
  530. }
  531. static __inline __u32 retry_get_write_seq(void *sk,
  532. int offset,
  533. int snd_nxt_offset)
  534. {
  535. /*
  536. * 判断依据
  537. *
  538. * write_seq == snd_nxt && snd_nxt != 0 && write_seq != 0
  539. */
  540. __u32 snd_nxt, write_seq;
  541. bpf_probe_read(&write_seq, sizeof(write_seq), (void *)sk + offset);
  542. bpf_probe_read(&snd_nxt, sizeof(snd_nxt), (void *)sk + snd_nxt_offset);
  543. if (snd_nxt == write_seq && snd_nxt != 0 && write_seq != 0) {
  544. return write_seq;
  545. } else
  546. return 0;
  547. return 0;
  548. }
  549. static __inline __u32 retry_get_copied_seq(void *sk,
  550. int offset)
  551. {
  552. /*
  553. * 判断依据
  554. * copied_seq + 1 == rcv_wup
  555. * tcp_header_len 在[20, 60]区间
  556. * rcv_wup == rcv_nxt
  557. * rcv_wup != 0 && rcv_nxt != 0 && copied_seq != 0
  558. *
  559. * struct tcp_sock {
  560. * ...
  561. * u16 tcp_header_len; -28
  562. * ...
  563. * u64 bytes_received; -20
  564. * ...
  565. * u32 rcv_nxt; -4
  566. * u32 copied_seq; 0
  567. * u32 rcv_wup; +4
  568. * u32 snd_nxt; +8
  569. * ...
  570. * }
  571. */
  572. __u32 rcv_nxt, rcv_wup, copied_seq;
  573. __u16 tcp_header_len;
  574. bpf_probe_read(&copied_seq, sizeof(copied_seq), (void *)sk + offset);
  575. bpf_probe_read(&rcv_nxt, sizeof(rcv_nxt), (void *)sk + offset - 4);
  576. bpf_probe_read(&rcv_wup, sizeof(rcv_wup), (void *)sk + offset + 4);
  577. bpf_probe_read(&tcp_header_len, sizeof(tcp_header_len), (void *)sk + offset - 28);
  578. if (!(tcp_header_len >= 20 && tcp_header_len <= 60 && copied_seq != 0))
  579. return 0;
  580. if ((copied_seq == rcv_nxt && rcv_wup == rcv_nxt)) {
  581. return copied_seq;
  582. }
  583. return 0;
  584. }
  585. static __inline void infer_tcp_seq_offset(void *sk,
  586. struct member_fields_offset *offset)
  587. {
  588. // 成员 copied_seq 在 struct tcp_sock 中的偏移量
  589. // 0x644 for EulerOS 4.18.0-147
  590. // 0x65c for 4.19.90-23.15.v2101.ky10.x86_64
  591. // 0x654 for 5.10.0-60.18.0.50.h322_1.hce2.aarch64
  592. #ifdef LINUX_VER_KYLIN
  593. int copied_seq_offsets[] = {0x514, 0x524, 0x52c, 0x534, 0x53c,
  594. 0x544, 0x54c, 0x554, 0x55c, 0x564,
  595. 0x56c, 0x574, 0x57c, 0x584, 0x58c,
  596. 0x594, 0x59c, 0x5dc, 0x644, 0x65c};
  597. #elif defined LINUX_VER_5_2_PLUS
  598. // 0x63c for OEL7.9 Linux 5.4.17
  599. int copied_seq_offsets[] = {0x514, 0x51c, 0x524, 0x52c, 0x534,
  600. 0x53c, 0x544, 0x54c, 0x554, 0x55c,
  601. 0x564, 0x56c, 0x574, 0x57c, 0x584,
  602. 0x58c, 0x594, 0x59c, 0x5dc, 0x644,
  603. 0x654, 0x63c};
  604. #else
  605. int copied_seq_offsets[] = {0x514, 0x51c, 0x524, 0x52c, 0x534,
  606. 0x53c, 0x544, 0x54c, 0x554, 0x55c,
  607. 0x564, 0x56c, 0x574, 0x57c, 0x584,
  608. 0x58c, 0x594, 0x59c, 0x5dc, 0x644,
  609. 0x664};
  610. #endif
  611. // 成员 write_seq 在 struct tcp_sock 中的偏移量
  612. // 0x7b4 for EulerOS 4.18.0-147
  613. // 0x7cc for 4.19.90-23.15.v2101.ky10.x86_64
  614. // The 0x684 feature code interferes with the inference of write_seq in the Kylin system. It must be removed.
  615. // 0x7d4 for 5.10.0-60.18.0.50.h322_1.hce2.aarch64
  616. #ifdef LINUX_VER_KYLIN
  617. int write_seq_offsets[] = {0x66c, 0x674, 0x68c, 0x694, 0x69c, 0x6a4,
  618. 0x6ac, 0x6b4, 0x6bc, 0x6c4, 0x6cc, 0x6d4,
  619. 0x6dc, 0x6ec, 0x6f4, 0x6fc, 0x704, 0x70c,
  620. 0x714, 0x71c, 0x74c, 0x7b4, 0x7cc};
  621. #elif defined LINUX_VER_5_2_PLUS
  622. // 0x7bc for OEL7.9 Linux 5.4.17
  623. int write_seq_offsets[] = {0x66c, 0x674, 0x67c, 0x684, 0x68c, 0x694,
  624. 0x69c, 0x6a4, 0x6ac, 0x6b4, 0x6bc, 0x6c4,
  625. 0x6cc, 0x6d4, 0x6dc, 0x6e4, 0x6ec, 0x6f4,
  626. 0x6fc, 0x704, 0x70c, 0x714, 0x71c, 0x74c,
  627. 0x7b4, 0x7d4, 0x7bc};
  628. #else
  629. int write_seq_offsets[] = {0x66c, 0x674, 0x67c, 0x684, 0x68c, 0x694,
  630. 0x69c, 0x6a4, 0x6ac, 0x6b4, 0x6bc, 0x6c4,
  631. 0x6cc, 0x6d4, 0x6dc, 0x6e4, 0x6ec, 0x6f4,
  632. 0x6fc, 0x704, 0x70c, 0x714, 0x71c, 0x74c,
  633. 0x7b4, 0x7d4};
  634. #endif
  635. int i, snd_nxt_offset = 0;
  636. if (!offset->tcp_sock__copied_seq_offset) {
  637. #pragma unroll
  638. for (i = 0; i < ARRAY_SIZE(copied_seq_offsets); i++) {
  639. if (retry_get_copied_seq(sk, copied_seq_offsets[i])) {
  640. offset->tcp_sock__copied_seq_offset = copied_seq_offsets[i];
  641. break;
  642. }
  643. }
  644. }
  645. /*
  646. * snd_nxt_offset 用于write_seq offset的判断。
  647. *
  648. * u32 copied_seq; 0
  649. * u32 rcv_wup; +4
  650. * u32 snd_nxt; +8
  651. */
  652. snd_nxt_offset = offset->tcp_sock__copied_seq_offset + 8;
  653. if (snd_nxt_offset == 8)
  654. return;
  655. if (!offset->tcp_sock__write_seq_offset) {
  656. #pragma unroll
  657. for (i = 0; i < ARRAY_SIZE(write_seq_offsets); i++) {
  658. if (retry_get_write_seq(sk, write_seq_offsets[i], snd_nxt_offset)) {
  659. offset->tcp_sock__write_seq_offset = write_seq_offsets[i];
  660. break;
  661. }
  662. }
  663. }
  664. }
  665. static __inline int infer_offset_retry(int fd)
  666. {
  667. __u32 k0 = 0;
  668. struct member_fields_offset *offset = members_offset__lookup(&k0);
  669. if (!offset)
  670. return OFFSET_NO_READY;
  671. if (unlikely(!offset->ready)) {
  672. __u64 *adapt_uid = adapt_kern_uid_map__lookup(&k0);
  673. if (!adapt_uid)
  674. return OFFSET_NO_READY;
  675. debug("adapt_uid:%llu",adapt_uid);
  676. // Only a preset uid can be adapted to the kernel
  677. if (*adapt_uid != bpf_get_current_pid_tgid())
  678. return OFFSET_NO_READY;
  679. void *infer_sk =
  680. infer_and_get_socket_from_fd(fd, offset, false);
  681. if (infer_sk) {
  682. if (unlikely(!offset->sock__flags_offset))
  683. infer_sock_flags(infer_sk, offset);
  684. if (unlikely(!offset->tcp_sock__copied_seq_offset ||
  685. !offset->tcp_sock__write_seq_offset)) {
  686. infer_tcp_seq_offset(infer_sk, offset);
  687. if (likely
  688. (offset->tcp_sock__copied_seq_offset
  689. && offset->tcp_sock__write_seq_offset
  690. && offset->sock__flags_offset
  691. && offset->task__files_offset))
  692. offset->ready = 1;
  693. }
  694. }
  695. }
  696. if (!offset->ready)
  697. return OFFSET_NO_READY;
  698. return OFFSET_READY;
  699. }
  700. #define CHECK_OFFSET_READY(f) \
  701. do { \
  702. if (infer_offset_retry((f)) == OFFSET_NO_READY) \
  703. return 0; \
  704. } while(0)
  705. #define TRACE_MAP_ACT_NONE 0
  706. #define TRACE_MAP_ACT_NEW 1
  707. #define TRACE_MAP_ACT_DEL 2
  708. static __inline void trace_process(struct socket_info_t *socket_info_ptr,
  709. struct conn_info_t* conn_info,
  710. __u64 socket_id, __u64 pid_tgid,
  711. struct trace_info_t *trace_info_ptr,
  712. struct trace_conf_t *trace_conf,
  713. struct trace_stats *trace_stats,
  714. __u64 *thread_trace_id,
  715. __u64 time_stamp,
  716. struct trace_key_t *trace_key) {
  717. /*
  718. * ==========================================
  719. * Thread-Trace-ID (Single Redirect Trace)
  720. * ==========================================
  721. *
  722. * Ingress | | Egress
  723. * ----------------------------------------------------------
  724. * socket-a |
  725. * trace start ID ① -> | |
  726. * | socket-b
  727. * - same thread ID --- |
  728. * | ① -> trace end
  729. * |
  730. * |
  731. * ... ...
  732. * socket-n
  733. * trace start ID ② -> | |
  734. * | socket-m
  735. * - same thread ID --- |
  736. * | ② -> trace end
  737. */
  738. /*
  739. * 同方向多个连续请求或回应的场景:
  740. *
  741. * Ingress |
  742. * ----------------------
  743. * socket-n
  744. * ① -> |
  745. * ② -> |
  746. * ③ -> |
  747. * ......
  748. *
  749. *
  750. * | Egress
  751. * -----------------------------
  752. * socket-m
  753. * | -> ①
  754. * ......
  755. * 采用的策略是:沿用上次trace_info保存的traceID。
  756. */
  757. /*
  758. * Socket A actively sends a request as a client (traceID is 0),
  759. * and associates socket B with the thread ID. Socket B receives a
  760. * response as the client, create new traceID as the starting point
  761. * for the entire tracking process. There is a problem in tracking
  762. * down like this, and a closed loop cannot be formed. This is due to
  763. * receiving a response from socket B to start the trace, but not being
  764. * able to get a request from socket B to finish the entire trace.
  765. *
  766. * (socket A) -- request ->
  767. * |
  768. * (socket B) <- response (traceID-1) [The starting point of trace]
  769. * |
  770. * (socket C) -- request -> (traceID-1)
  771. * |
  772. * (socket D) <- response (traceID-2)
  773. * |
  774. * (socket E) -- request -> (traceID-2)
  775. * ... ... (Can't finish the whole trace)
  776. *
  777. * In order to avoid invalid association of the client, the behavior of creating
  778. * a new trace on socket B is cancelled.
  779. *
  780. * (socket A) ------- request -------->
  781. * |
  782. * thread-ID
  783. * |
  784. * (socket B) <---- response (Here, not create new trace.)
  785. */
  786. __u64 pre_trace_id = 0;
  787. int ret;
  788. if (is_socket_info_valid(socket_info_ptr) &&
  789. conn_info->direction == socket_info_ptr->direction &&
  790. conn_info->message_type == socket_info_ptr->msg_type) {
  791. if (trace_info_ptr)
  792. pre_trace_id = trace_info_ptr->thread_trace_id;
  793. conn_info->keep_data_seq = true; // 同时这里确保捕获数据的序列号保持不变。
  794. }
  795. if (conn_info->direction == T_INGRESS) {
  796. if (trace_info_ptr) {
  797. /*
  798. * The following scenarios do not track:
  799. * ---------------------------------------
  800. * [traceID : 0]
  801. * (client-socket) request ->
  802. * |
  803. * thread-ID
  804. * |
  805. * (client-socket) <- response
  806. */
  807. if (trace_info_ptr->is_trace_id_zero &&
  808. conn_info->message_type == MSG_RESPONSE &&
  809. conn_info->infer_reliable) {
  810. if (!trace_map__delete(trace_key)) {
  811. __sync_fetch_and_add(&trace_stats->
  812. trace_map_count,
  813. -1);
  814. }
  815. return;
  816. }
  817. }
  818. struct trace_info_t trace_info = { 0 };
  819. *thread_trace_id = trace_info.thread_trace_id =
  820. (pre_trace_id ==
  821. 0 ? ++trace_conf->thread_trace_id : pre_trace_id);
  822. if (conn_info->message_type == MSG_REQUEST)
  823. trace_info.peer_fd = conn_info->fd;
  824. else if (conn_info->message_type == MSG_RESPONSE) {
  825. if (is_socket_info_valid(socket_info_ptr) &&
  826. socket_info_ptr->peer_fd != 0)
  827. trace_info.peer_fd = socket_info_ptr->peer_fd;
  828. }
  829. trace_info.update_time = time_stamp / NS_PER_SEC;
  830. trace_info.socket_id = socket_id;
  831. ret = trace_map__update(trace_key, &trace_info);
  832. if (!trace_info_ptr) {
  833. if (ret == 0) {
  834. __sync_fetch_and_add(&trace_stats->
  835. trace_map_count, 1);
  836. }
  837. }
  838. } else { /* direction == T_EGRESS */
  839. if (trace_info_ptr) {
  840. /*
  841. * Skip the scene below:
  842. * ------------------------------------------------
  843. * (client-socket) request [traceID : 0] ->
  844. * |
  845. * thread-ID
  846. * |
  847. * (client-socket) request [traceID : 0] ->
  848. */
  849. if (trace_info_ptr->is_trace_id_zero) {
  850. return;
  851. }
  852. /*
  853. * 追踪在不同socket之间进行,而对于在同一个socket的情况进行忽略。
  854. */
  855. if (socket_id != trace_info_ptr->socket_id) {
  856. *thread_trace_id =
  857. trace_info_ptr->thread_trace_id;
  858. }
  859. if (!trace_map__delete(trace_key)) {
  860. __sync_fetch_and_add(&trace_stats->
  861. trace_map_count, -1);
  862. }
  863. } else {
  864. /*
  865. * Record the scene below:
  866. * ------------------------------------------------
  867. * (client-socket) request [traceID : 0] ->
  868. */
  869. if (conn_info->message_type == MSG_REQUEST
  870. && conn_info->infer_reliable) {
  871. struct trace_info_t trace_info = { 0 };
  872. trace_info.is_trace_id_zero = true;
  873. trace_info.update_time =
  874. time_stamp / NS_PER_SEC;
  875. trace_map__update(trace_key, &trace_info);
  876. __sync_fetch_and_add(&trace_stats->
  877. trace_map_count, 1);
  878. }
  879. }
  880. }
  881. }
  882. static __inline int
  883. __data_submit(struct pt_regs *ctx, struct conn_info_t *conn_info,
  884. const struct data_args_t *args, const bool vecs, __u32 syscall_len,
  885. struct member_fields_offset *offset, __u64 time_stamp,
  886. const struct process_data_extra *extra)
  887. {
  888. if (conn_info == NULL) {
  889. return SUBMIT_INVALID;
  890. }
  891. // ignore non-http protocols that are go tls
  892. if (extra->source == DATA_SOURCE_GO_TLS_UPROBE) {
  893. if (conn_info->protocol != PROTO_HTTP1)
  894. return SUBMIT_INVALID;
  895. }
  896. if (extra->source == DATA_SOURCE_OPENSSL_UPROBE) {
  897. if (conn_info->protocol != PROTO_HTTP1 &&
  898. conn_info->protocol != PROTO_HTTP2)
  899. return SUBMIT_INVALID;
  900. }
  901. if (conn_info->sk == NULL || conn_info->message_type == MSG_UNKNOWN) {
  902. return SUBMIT_INVALID;
  903. }
  904. __u64 pid_tgid = bpf_get_current_pid_tgid();
  905. __u32 tgid = (__u32) (pid_tgid >> 32);
  906. __u64 conn_key = gen_conn_key_id((__u64)tgid, (__u64)conn_info->fd);
  907. if (conn_info->message_type == MSG_CLEAR) {
  908. delete_socket_info(conn_key, conn_info->socket_info_ptr);
  909. return SUBMIT_INVALID;
  910. }
  911. __u32 tcp_seq = args->tcp_seq;
  912. __u64 thread_trace_id = 0;
  913. __u32 k0 = 0;
  914. struct socket_info_t sk_info = { 0 };
  915. struct trace_conf_t *trace_conf = trace_conf_map__lookup(&k0);
  916. if (trace_conf == NULL)
  917. return SUBMIT_INVALID;
  918. /*
  919. * It is possible that these values were modified during ebpf running,
  920. * so they are saved here.
  921. */
  922. int data_max_sz = trace_conf->data_limit_max;
  923. struct trace_stats *trace_stats = trace_stats_map__lookup(&k0);
  924. if (trace_stats == NULL)
  925. return SUBMIT_INVALID;
  926. // bpf_debug("[GO timeout]:%llu",trace_conf->socket_id);
  927. __u32 timeout = trace_conf->go_tracing_timeout;
  928. struct trace_key_t trace_key = get_trace_key(timeout, true);
  929. // debug("[GO]:%llu",trace_key.goid);
  930. // debug("[GO timeout]:%llu",timeout);
  931. struct trace_info_t *trace_info_ptr = trace_map__lookup(&trace_key);
  932. struct socket_info_t *socket_info_ptr = conn_info->socket_info_ptr;
  933. // 'socket_id' used to resolve non-tracing between the same socket
  934. __u64 socket_id = 0;
  935. if (!is_socket_info_valid(socket_info_ptr)) {
  936. // Not use "++trace_conf->socket_id" here,
  937. // because it did not pass the verification of linux 4.14.x, 4.15.x
  938. socket_id = trace_conf->socket_id + 1;
  939. } else {
  940. socket_id = socket_info_ptr->uid;
  941. }
  942. #define DNS_AAAA_TYPE_ID 0x1c
  943. // FIXME: By default, the Go process continuously sends A record and
  944. // AAAA record DNS request messages. In the current call chain tracking
  945. // implementation, two consecutive request messages before receiving
  946. // the response message will cause the link to be broken. Ignore the
  947. // AAAA record To ensure that the call chain will not be broken.
  948. if (conn_info->message_type != MSG_PRESTORE &&
  949. conn_info->message_type != MSG_RECONFIRM &&
  950. (timeout != 0 || extra->is_go_process == false) &&
  951. !(conn_info->protocol == PROTO_DNS &&
  952. conn_info->dns_q_type == DNS_AAAA_TYPE_ID))
  953. trace_process(socket_info_ptr, conn_info, socket_id, pid_tgid,
  954. trace_info_ptr, trace_conf, trace_stats,
  955. &thread_trace_id, time_stamp, &trace_key);
  956. if (!is_socket_info_valid(socket_info_ptr)) {
  957. if (socket_info_ptr && conn_info->direction == T_EGRESS) {
  958. sk_info.peer_fd = socket_info_ptr->peer_fd;
  959. thread_trace_id = socket_info_ptr->trace_id;
  960. }
  961. sk_info.uid = trace_conf->socket_id + 1;
  962. trace_conf->socket_id++; // Ensure that socket_id is incremented.
  963. sk_info.l7_proto = conn_info->protocol;
  964. sk_info.direction = conn_info->direction;
  965. sk_info.role = conn_info->role;
  966. sk_info.msg_type = conn_info->message_type;
  967. sk_info.update_time = time_stamp / NS_PER_SEC;
  968. sk_info.need_reconfirm = conn_info->need_reconfirm;
  969. sk_info.correlation_id = conn_info->correlation_id;
  970. /*
  971. * MSG_PRESTORE 目前只用于MySQL, Kafka协议推断
  972. */
  973. if (conn_info->message_type == MSG_PRESTORE) {
  974. *(__u32 *)sk_info.prev_data = *(__u32 *)conn_info->prev_buf;
  975. sk_info.prev_data_len = 4;
  976. sk_info.uid = 0;
  977. }
  978. int ret = socket_info_map__update(&conn_key, &sk_info);
  979. if (socket_info_ptr == NULL && ret == 0) {
  980. __sync_fetch_and_add(&trace_stats->
  981. socket_map_count, 1);
  982. }
  983. }
  984. /*
  985. * 对于预先存储数据或socket l7协议类型需要再次确认(适用于长链接)
  986. * 的动作只建立socket_info_map项不会发送数据给用户态程序。
  987. */
  988. if (conn_info->message_type == MSG_PRESTORE ||
  989. conn_info->message_type == MSG_RECONFIRM)
  990. return SUBMIT_INVALID;
  991. if (is_socket_info_valid(socket_info_ptr)) {
  992. sk_info.uid = socket_info_ptr->uid;
  993. /*
  994. * 同方向多个连续请求或回应的场景时,
  995. * 保持捕获数据的序列号保持不变。
  996. */
  997. if (!conn_info->keep_data_seq) {
  998. /*
  999. * Ensure that the accumulation operation of capturing the
  1000. * data sequence number is an atomic operation when multiple
  1001. * threads read/write to the socket simultaneously.
  1002. */
  1003. __sync_fetch_and_add(&socket_info_ptr->seq, 1);
  1004. }
  1005. sk_info.seq = socket_info_ptr->seq;
  1006. socket_info_ptr->direction = conn_info->direction;
  1007. socket_info_ptr->msg_type = conn_info->message_type;
  1008. socket_info_ptr->update_time = time_stamp / NS_PER_SEC;
  1009. if (socket_info_ptr->peer_fd != 0 && conn_info->direction == T_INGRESS) {
  1010. __u64 peer_conn_key = gen_conn_key_id((__u64)tgid,
  1011. (__u64)socket_info_ptr->peer_fd);
  1012. struct socket_info_t *peer_socket_info_ptr =
  1013. socket_info_map__lookup(&peer_conn_key);
  1014. if (is_socket_info_valid(peer_socket_info_ptr))
  1015. peer_socket_info_ptr->trace_id = thread_trace_id;
  1016. }
  1017. if (conn_info->direction == T_EGRESS && socket_info_ptr->trace_id != 0) {
  1018. thread_trace_id = socket_info_ptr->trace_id;
  1019. socket_info_ptr->trace_id = 0;
  1020. }
  1021. }
  1022. struct __socket_data_buffer *v_buff = bpf_map_lookup_elem(&NAME(data_buf), &k0);
  1023. if (!v_buff)
  1024. return SUBMIT_INVALID;
  1025. struct __socket_data *v = (struct __socket_data *)&v_buff->data[0];
  1026. if (v_buff->len > (sizeof(v_buff->data) - sizeof(*v)))
  1027. return SUBMIT_INVALID;
  1028. v = (struct __socket_data *)(v_buff->data + v_buff->len);
  1029. if (get_socket_info(v, conn_info->sk, conn_info) == false)
  1030. return SUBMIT_INVALID;
  1031. v->tuple.l4_protocol = conn_info->tuple.l4_protocol;
  1032. v->tuple.dport = conn_info->tuple.dport;
  1033. v->tuple.num = conn_info->tuple.num;
  1034. v->data_type = conn_info->protocol;
  1035. if (conn_info->protocol == PROTO_HTTP1 &&
  1036. (extra->source == DATA_SOURCE_GO_TLS_UPROBE ||
  1037. extra->source == DATA_SOURCE_OPENSSL_UPROBE))
  1038. v->data_type = PROTO_TLS_HTTP1;
  1039. if (conn_info->protocol == PROTO_HTTP2 &&
  1040. (extra->source == DATA_SOURCE_OPENSSL_UPROBE))
  1041. v->data_type = PROTO_TLS_HTTP2;
  1042. v->socket_id = sk_info.uid;
  1043. v->data_seq = sk_info.seq;
  1044. v->tgid = tgid;
  1045. v->pid = (__u32) pid_tgid;
  1046. // For blocking reads, there is a significant deviation between the
  1047. // entry time of the system call and the real time of the read
  1048. // operation. Therefore, the end time of the system call is used for
  1049. // the read operation.
  1050. v->timestamp = conn_info->direction == T_INGRESS ? bpf_ktime_get_ns() :
  1051. time_stamp;
  1052. v->direction = conn_info->direction;
  1053. v->syscall_len = syscall_len;
  1054. v->msg_type = conn_info->message_type;
  1055. v->tcp_seq = 0;
  1056. if ((extra->source == DATA_SOURCE_GO_TLS_UPROBE ||
  1057. extra->source == DATA_SOURCE_OPENSSL_UPROBE) ||
  1058. (conn_info->direction == T_INGRESS &&
  1059. conn_info->tuple.l4_protocol == IPPROTO_TCP)) {
  1060. /*
  1061. * If the current state is TCPF_CLOSE_WAIT, the FIN frame already has been received.
  1062. * However, it cannot be confirmed that it has been processed by the syscall,
  1063. * so use the tcp_seq value that entering the syscalls.
  1064. *
  1065. * Why not use "v->tcp_seq = args->tcp_seq;" ?
  1066. * This is because kernel 4.14 verify reports errors("R0 invalid mem access 'inv'").
  1067. */
  1068. v->tcp_seq = tcp_seq;
  1069. } else if (conn_info->direction == T_EGRESS &&
  1070. conn_info->tuple.l4_protocol == IPPROTO_TCP) {
  1071. v->tcp_seq = get_tcp_write_seq_from_fd(conn_info->fd) - syscall_len;
  1072. }
  1073. v->thread_trace_id = thread_trace_id;
  1074. bpf_get_current_comm(v->comm, sizeof(v->comm));
  1075. if (conn_info->tuple.l4_protocol == IPPROTO_TCP &&
  1076. conn_info->protocol == PROTO_DNS && conn_info->prev_count == 2) {
  1077. v->tcp_seq -= 2;
  1078. conn_info->prev_count = 0;
  1079. }
  1080. if (conn_info->prev_count == 4) {
  1081. // 注意这里没有调整v->syscall_len和v->len我们会在用户层做。
  1082. v->extra_data = *(__u32 *)conn_info->prev_buf;
  1083. v->extra_data_count = conn_info->prev_count;
  1084. v->tcp_seq -= conn_info->prev_count; // 客户端和服务端的tcp_seq匹配
  1085. } else
  1086. v->extra_data_count = 0;
  1087. v->coroutine_id = trace_key.goid;
  1088. v->source = extra->source;
  1089. #ifdef LINUX_VER_5_2_PLUS
  1090. __u32 cache_key = (__u32) pid_tgid >> 16;
  1091. if (cache_key < PROTO_INFER_CACHE_SIZE) {
  1092. struct proto_infer_cache_t *p;
  1093. p = proto_infer_cache_map__lookup(&cache_key);
  1094. if (p) {
  1095. __u16 idx = (__u16) pid_tgid;
  1096. p->protocols[idx] = (__u8) v->data_type;
  1097. }
  1098. }
  1099. #endif
  1100. struct tail_calls_context *context = (struct tail_calls_context *)v->data;
  1101. context->max_size_limit = data_max_sz;
  1102. context->vecs = (bool) vecs;
  1103. context->dir = conn_info->direction;
  1104. return SUBMIT_OK;
  1105. }
  1106. static __inline int process_data(struct pt_regs *ctx, __u64 id,
  1107. const enum traffic_direction direction,
  1108. const struct data_args_t *args,
  1109. ssize_t bytes_count,
  1110. const struct process_data_extra *extra)
  1111. {
  1112. if (!extra)
  1113. return -1;
  1114. if (!extra->vecs && args->buf == NULL)
  1115. return -1;
  1116. if (extra->vecs && (args->iov == NULL || args->iovlen <= 0))
  1117. return -1;
  1118. if (unlikely(args->fd < 0 || (int)bytes_count <= 0))
  1119. return -1;
  1120. // TODO : 此处可以根据配置对进程号进行过滤
  1121. __u32 pid = id >> 32;
  1122. if (load_filter_pid() != 0 && pid != load_filter_pid()) {
  1123. return -1;
  1124. }
  1125. __u32 k0 = 0;
  1126. struct member_fields_offset *offset = members_offset__lookup(&k0);
  1127. if (!offset)
  1128. return -1;
  1129. if (unlikely(!offset->ready))
  1130. return -1;
  1131. void *sk = get_socket_from_fd(args->fd, offset);
  1132. struct conn_info_t *conn_info, __conn_info = { 0 };
  1133. conn_info = &__conn_info;
  1134. __u8 sock_state;
  1135. if (!(sk != NULL &&
  1136. ((sock_state = is_tcp_udp_data(sk, offset, conn_info))
  1137. != SOCK_CHECK_TYPE_ERROR))) {
  1138. return -1;
  1139. }
  1140. init_conn_info(id >> 32, args->fd, &__conn_info, sk, offset);
  1141. conn_info->direction = direction;
  1142. struct ctx_info_s *ctx_map = bpf_map_lookup_elem(&NAME(ctx_info), &k0);
  1143. if (!ctx_map)
  1144. return -1;
  1145. bool data_submit_dircet = false;
  1146. struct allow_port_bitmap *bp = allow_port_bitmap__lookup(&k0);
  1147. if (bp) {
  1148. if (is_set_bitmap(bp->bitmap, conn_info->tuple.dport) ||
  1149. is_set_bitmap(bp->bitmap, conn_info->tuple.num)) {
  1150. debug("data_submit_dircet = true");
  1151. data_submit_dircet = true;
  1152. }
  1153. }
  1154. if (data_submit_dircet) {
  1155. conn_info->protocol = PROTO_ORTHER;
  1156. conn_info->message_type = MSG_REQUEST;
  1157. } else {
  1158. infer_l7_class(ctx_map, conn_info, direction, args,
  1159. bytes_count, sock_state, extra);
  1160. }
  1161. // When at least one of protocol or message_type is valid,
  1162. // data_submit can be performed, otherwise MySQL data may be lost
  1163. if (conn_info->protocol != PROTO_UNKNOWN ||
  1164. conn_info->message_type != MSG_UNKNOWN) {
  1165. /*
  1166. * Fill in tail call context information.
  1167. */
  1168. ctx_map->tail_call.conn_info = __conn_info;
  1169. ctx_map->tail_call.extra = *extra;
  1170. ctx_map->tail_call.bytes_count = bytes_count;
  1171. ctx_map->tail_call.offset = offset;
  1172. return 0;
  1173. }
  1174. return -1;
  1175. }
  1176. static __inline void process_syscall_data(struct pt_regs* ctx, __u64 id,
  1177. const enum traffic_direction direction,
  1178. const struct data_args_t* args, ssize_t bytes_count) {
  1179. struct process_data_extra extra = {
  1180. .vecs = false,
  1181. .source = DATA_SOURCE_SYSCALL,
  1182. .is_go_process = is_current_go_process(),
  1183. };
  1184. if (!process_data(ctx, id, direction, args, bytes_count, &extra)) {
  1185. bpf_tail_call(ctx, &NAME(progs_jmp_tp_map),
  1186. PROG_DATA_SUBMIT_TP_IDX);
  1187. } else {
  1188. bpf_tail_call(ctx, &NAME(progs_jmp_tp_map),
  1189. PROG_IO_EVENT_TP_IDX);
  1190. }
  1191. }
  1192. static __inline void process_syscall_data_vecs(struct pt_regs* ctx, __u64 id,
  1193. const enum traffic_direction direction,
  1194. const struct data_args_t* args,
  1195. ssize_t bytes_count) {
  1196. struct process_data_extra extra = {
  1197. .vecs = true,
  1198. .source = DATA_SOURCE_SYSCALL,
  1199. .is_go_process = is_current_go_process(),
  1200. };
  1201. if (!process_data(ctx, id, direction, args, bytes_count, &extra)) {
  1202. bpf_tail_call(ctx, &NAME(progs_jmp_tp_map),
  1203. PROG_DATA_SUBMIT_TP_IDX);
  1204. } else {
  1205. bpf_tail_call(ctx, &NAME(progs_jmp_tp_map),
  1206. PROG_IO_EVENT_TP_IDX);
  1207. }
  1208. }
  1209. /***********************************************************
  1210. * BPF syscall probe/tracepoint function entry-points
  1211. ***********************************************************/
  1212. //TPPROG(sys_enter_write) (struct syscall_comm_enter_ctx *ctx) {
  1213. // __u64 id = bpf_get_current_pid_tgid();
  1214. // int fd = (int)ctx->fd;
  1215. // char *buf = (char *)ctx->buf;
  1216. //
  1217. // struct data_args_t write_args = {};
  1218. // write_args.source_fn = SYSCALL_FUNC_WRITE;
  1219. // write_args.fd = fd;
  1220. // write_args.buf = buf;
  1221. // write_args.enter_ts = bpf_ktime_get_ns();
  1222. // active_write_args_map__update(&id, &write_args);
  1223. //
  1224. // return 0;
  1225. //}
  1226. //
  1227. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_write/format
  1228. //TPPROG(sys_exit_write) (struct syscall_comm_exit_ctx *ctx) {
  1229. // __u64 id = bpf_get_current_pid_tgid();
  1230. // ssize_t bytes_count = ctx->ret;
  1231. // // Unstash arguments, and process syscall.
  1232. // struct data_args_t* write_args = active_write_args_map__lookup(&id);
  1233. // // Don't process FD 0-2 to avoid STDIN, STDOUT, STDERR.
  1234. // if (write_args != NULL && write_args->fd > 2) {
  1235. // write_args->bytes_count = bytes_count;
  1236. // process_syscall_data((struct pt_regs *)ctx, id, T_EGRESS, write_args, bytes_count);
  1237. // }
  1238. //
  1239. // active_write_args_map__delete(&id);
  1240. // return 0;
  1241. //}
  1242. //
  1243. //// ssize_t read(int fd, void *buf, size_t count);
  1244. //TPPROG(sys_enter_read) (struct syscall_comm_enter_ctx *ctx) {
  1245. // __u64 id = bpf_get_current_pid_tgid();
  1246. // int fd = (int)ctx->fd;
  1247. // char *buf = (char *)ctx->buf;
  1248. // // Stash arguments.
  1249. // struct data_args_t read_args = {};
  1250. // read_args.source_fn = SYSCALL_FUNC_READ;
  1251. // read_args.fd = fd;
  1252. // read_args.buf = buf;
  1253. // read_args.enter_ts = bpf_ktime_get_ns();
  1254. // read_args.tcp_seq = get_tcp_read_seq_from_fd(fd);
  1255. // active_read_args_map__update(&id, &read_args);
  1256. //
  1257. // return 0;
  1258. //}
  1259. //
  1260. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_read/format
  1261. //TPPROG(sys_exit_read) (struct syscall_comm_exit_ctx *ctx) {
  1262. // __u64 id = bpf_get_current_pid_tgid();
  1263. // ssize_t bytes_count = ctx->ret;
  1264. // // Unstash arguments, and process syscall.
  1265. // struct data_args_t* read_args = active_read_args_map__lookup(&id);
  1266. // // Don't process FD 0-2 to avoid STDIN, STDOUT, STDERR.
  1267. // if (read_args != NULL && read_args->fd > 2) {
  1268. // read_args->bytes_count = bytes_count;
  1269. // process_syscall_data((struct pt_regs *)ctx, id, T_INGRESS, read_args, bytes_count);
  1270. // }
  1271. //
  1272. // active_read_args_map__delete(&id);
  1273. // return 0;
  1274. //}
  1275. //
  1276. //// ssize_t sendto(int sockfd, const void *buf, size_t len, int flags,
  1277. //// const struct sockaddr *dest_addr, socklen_t addrlen);
  1278. //TPPROG(sys_enter_sendto) (struct syscall_comm_enter_ctx *ctx) {
  1279. // __u64 id = bpf_get_current_pid_tgid();
  1280. // int sockfd = (int)ctx->fd;
  1281. // char *buf = (char *)ctx->buf;
  1282. // // Stash arguments.
  1283. // struct data_args_t write_args = {};
  1284. // write_args.source_fn = SYSCALL_FUNC_SENDTO;
  1285. // write_args.fd = sockfd;
  1286. // write_args.buf = buf;
  1287. // write_args.enter_ts = bpf_ktime_get_ns();
  1288. // active_write_args_map__update(&id, &write_args);
  1289. //
  1290. // return 0;
  1291. //}
  1292. //
  1293. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_sendto/format
  1294. //TPPROG(sys_exit_sendto) (struct syscall_comm_exit_ctx *ctx) {
  1295. // __u64 id = bpf_get_current_pid_tgid();
  1296. // ssize_t bytes_count = ctx->ret;
  1297. //
  1298. // // 潜在的问题:如果sentto() addr是由TCP连接提供的,系统调用可能会忽略它,但我们仍然会跟踪它。在实践中,TCP连接不应该使用带addr参数的sendto()。
  1299. // // 在手册页中:
  1300. // // 如果sendto()用于连接模式(SOCK_STREAM, SOCK_SEQPACKET)套接字,参数
  1301. // // dest_addr和addrlen会被忽略(如果不是,可能会返回EISCONN错误空和0)
  1302. // //
  1303. // // Unstash arguments, and process syscall.
  1304. // struct data_args_t* write_args = active_write_args_map__lookup(&id);
  1305. // if (write_args != NULL) {
  1306. // write_args->bytes_count = bytes_count;
  1307. // process_syscall_data((struct pt_regs*)ctx, id, T_EGRESS, write_args, bytes_count);
  1308. // active_write_args_map__delete(&id);
  1309. // }
  1310. //
  1311. // return 0;
  1312. //}
  1313. //
  1314. //// ssize_t recvfrom(int sockfd, void *buf, size_t len, int flags,
  1315. //// struct sockaddr *src_addr, socklen_t *addrlen);
  1316. //TPPROG(sys_enter_recvfrom) (struct syscall_comm_enter_ctx *ctx) {
  1317. // // If flags contains MSG_PEEK, it is returned directly.
  1318. // // ref : https://linux.die.net/man/2/recvfrom
  1319. // if (ctx->flags & MSG_PEEK)
  1320. // return 0;
  1321. // __u64 id = bpf_get_current_pid_tgid();
  1322. // int sockfd = (int)ctx->fd;
  1323. // char *buf = (char *)ctx->buf;
  1324. // // Stash arguments.
  1325. // struct data_args_t read_args = {};
  1326. // read_args.source_fn = SYSCALL_FUNC_RECVFROM;
  1327. // read_args.fd = sockfd;
  1328. // read_args.buf = buf;
  1329. // read_args.enter_ts = bpf_ktime_get_ns();
  1330. // read_args.tcp_seq = get_tcp_read_seq_from_fd(sockfd);
  1331. // active_read_args_map__update(&id, &read_args);
  1332. //
  1333. // return 0;
  1334. //}
  1335. //
  1336. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_recvfrom/format
  1337. //TPPROG(sys_exit_recvfrom) (struct syscall_comm_exit_ctx *ctx) {
  1338. // __u64 id = bpf_get_current_pid_tgid();
  1339. // ssize_t bytes_count = ctx->ret;
  1340. //
  1341. // // Unstash arguments, and process syscall.
  1342. // struct data_args_t* read_args = active_read_args_map__lookup(&id);
  1343. // if (read_args != NULL) {
  1344. // read_args->bytes_count = bytes_count;
  1345. // process_syscall_data((struct pt_regs *)ctx, id, T_INGRESS, read_args, bytes_count);
  1346. // active_read_args_map__delete(&id);
  1347. // }
  1348. //
  1349. // return 0;
  1350. //}
  1351. //
  1352. //// ssize_t sendmsg(int sockfd, const struct msghdr *msg, int flags);
  1353. //KPROG(__sys_sendmsg) (struct pt_regs* ctx) {
  1354. // __u64 id = bpf_get_current_pid_tgid();
  1355. // int sockfd = (int)PT_REGS_PARM1(ctx);
  1356. // struct user_msghdr *msghdr_ptr = (struct user_msghdr *)PT_REGS_PARM2(ctx);
  1357. //
  1358. // if (msghdr_ptr != NULL) {
  1359. // // Stash arguments.
  1360. // struct user_msghdr *msghdr, __msghdr;
  1361. // bpf_probe_read(&__msghdr, sizeof(__msghdr), msghdr_ptr);
  1362. // msghdr = &__msghdr;
  1363. // // Stash arguments.
  1364. // struct data_args_t write_args = {};
  1365. // write_args.source_fn = SYSCALL_FUNC_SENDMSG;
  1366. // write_args.fd = sockfd;
  1367. // write_args.iov = msghdr->msg_iov;
  1368. // write_args.iovlen = msghdr->msg_iovlen;
  1369. // write_args.enter_ts = bpf_ktime_get_ns();
  1370. // active_write_args_map__update(&id, &write_args);
  1371. // }
  1372. //
  1373. // return 0;
  1374. //}
  1375. //
  1376. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_sendmsg/format
  1377. //TPPROG(sys_exit_sendmsg) (struct syscall_comm_exit_ctx *ctx) {
  1378. // __u64 id = bpf_get_current_pid_tgid();
  1379. // ssize_t bytes_count = ctx->ret;
  1380. // // Unstash arguments, and process syscall.
  1381. // struct data_args_t* write_args = active_write_args_map__lookup(&id);
  1382. // if (write_args != NULL) {
  1383. // write_args->bytes_count = bytes_count;
  1384. // process_syscall_data_vecs((struct pt_regs *)ctx, id, T_EGRESS, write_args, bytes_count);
  1385. // active_write_args_map__delete(&id);
  1386. // }
  1387. //
  1388. // return 0;
  1389. //}
  1390. //
  1391. //// int sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen,
  1392. //// int flags);
  1393. //KPROG(__sys_sendmmsg)(struct pt_regs* ctx) {
  1394. // __u64 id = bpf_get_current_pid_tgid();
  1395. // int sockfd = (int)PT_REGS_PARM1(ctx);
  1396. // struct mmsghdr *msgvec_ptr = (struct mmsghdr *)PT_REGS_PARM2(ctx);
  1397. // unsigned int vlen = (unsigned int)PT_REGS_PARM3(ctx);
  1398. //
  1399. // if (msgvec_ptr != NULL && vlen >= 1) {
  1400. // struct mmsghdr *msgvec, __msgvec;
  1401. // bpf_probe_read(&__msgvec, sizeof(__msgvec), msgvec_ptr);
  1402. // msgvec = &__msgvec;
  1403. // // Stash arguments.
  1404. // struct data_args_t write_args = {};
  1405. // write_args.source_fn = SYSCALL_FUNC_SENDMMSG;
  1406. // write_args.fd = sockfd;
  1407. // write_args.iov = msgvec[0].msg_hdr.msg_iov;
  1408. // write_args.iovlen = msgvec[0].msg_hdr.msg_iovlen;
  1409. // write_args.msg_len = (void *)msgvec_ptr + offsetof(typeof(struct mmsghdr), msg_len); //&msgvec[0].msg_len;
  1410. // write_args.enter_ts = bpf_ktime_get_ns();
  1411. // active_write_args_map__update(&id, &write_args);
  1412. // }
  1413. //
  1414. // return 0;
  1415. //}
  1416. //
  1417. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_sendmmsg/format
  1418. //TPPROG(sys_exit_sendmmsg) (struct syscall_comm_exit_ctx *ctx) {
  1419. // __u64 id = bpf_get_current_pid_tgid();
  1420. //
  1421. // int num_msgs = ctx->ret;
  1422. //
  1423. // // Unstash arguments, and process syscall.
  1424. // struct data_args_t* write_args = active_write_args_map__lookup(&id);
  1425. // if (write_args != NULL && num_msgs > 0) {
  1426. // ssize_t bytes_count;
  1427. // bpf_probe_read(&bytes_count, sizeof(write_args->msg_len), write_args->msg_len);
  1428. // process_syscall_data_vecs((struct pt_regs *)ctx, id, T_EGRESS, write_args, bytes_count);
  1429. // }
  1430. // active_write_args_map__delete(&id);
  1431. //
  1432. // return 0;
  1433. //}
  1434. //
  1435. //// BSD recvmsg interface
  1436. //// long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned int flags,
  1437. //// bool forbid_cmsg_compat)
  1438. //// ssize_t recvmsg(int sockfd, struct msghdr *msg, int flags);
  1439. //KPROG(__sys_recvmsg) (struct pt_regs* ctx) {
  1440. // int flags = (int) PT_REGS_PARM3(ctx);
  1441. // if (flags & MSG_PEEK)
  1442. // return 0;
  1443. //
  1444. // __u64 id = bpf_get_current_pid_tgid();
  1445. // struct user_msghdr __msg, *msghdr = (struct user_msghdr *)PT_REGS_PARM2(ctx);
  1446. // int sockfd = (int) PT_REGS_PARM1(ctx);
  1447. //
  1448. // if (msghdr != NULL) {
  1449. // bpf_probe_read(&__msg, sizeof(__msg), (void *)msghdr);
  1450. // msghdr = &__msg;
  1451. // // Stash arguments.
  1452. // struct data_args_t read_args = {};
  1453. // read_args.source_fn = SYSCALL_FUNC_RECVMSG;
  1454. // read_args.fd = sockfd;
  1455. // read_args.iov = msghdr->msg_iov;
  1456. // read_args.iovlen = msghdr->msg_iovlen;
  1457. // read_args.enter_ts = bpf_ktime_get_ns();
  1458. // read_args.tcp_seq = get_tcp_read_seq_from_fd(sockfd);
  1459. // active_read_args_map__update(&id, &read_args);
  1460. // }
  1461. //
  1462. // return 0;
  1463. //}
  1464. //
  1465. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_recvmsg/format
  1466. //TPPROG(sys_exit_recvmsg) (struct syscall_comm_exit_ctx *ctx) {
  1467. // __u64 id = bpf_get_current_pid_tgid();
  1468. // ssize_t bytes_count = ctx->ret;
  1469. // // Unstash arguments, and process syscall.
  1470. // struct data_args_t* read_args = active_read_args_map__lookup(&id);
  1471. // if (read_args != NULL) {
  1472. // read_args->bytes_count = bytes_count;
  1473. // process_syscall_data_vecs((struct pt_regs *)ctx, id, T_INGRESS, read_args, bytes_count);
  1474. // active_read_args_map__delete(&id);
  1475. // }
  1476. //
  1477. // return 0;
  1478. //}
  1479. //
  1480. //// int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
  1481. //// unsigned int flags, struct timespec *timeout)
  1482. //KPROG(__sys_recvmmsg) (struct pt_regs* ctx) {
  1483. // int flags = (int) PT_REGS_PARM4(ctx);
  1484. // if (flags & MSG_PEEK)
  1485. // return 0;
  1486. //
  1487. // __u64 id = bpf_get_current_pid_tgid();
  1488. // int sockfd = (int)PT_REGS_PARM1(ctx);
  1489. // struct mmsghdr *msgvec = (struct mmsghdr *)PT_REGS_PARM2(ctx);
  1490. // unsigned int vlen = (unsigned int)PT_REGS_PARM3(ctx);
  1491. //
  1492. // if (msgvec != NULL && vlen >= 1) {
  1493. // int offset;
  1494. // // Stash arguments.
  1495. // struct data_args_t read_args = {};
  1496. // read_args.source_fn = SYSCALL_FUNC_RECVMMSG;
  1497. // read_args.fd = sockfd;
  1498. // read_args.enter_ts = bpf_ktime_get_ns();
  1499. //
  1500. // offset = offsetof(typeof(struct mmsghdr), msg_hdr) +
  1501. // offsetof(typeof(struct user_msghdr), msg_iov);
  1502. //
  1503. // bpf_probe_read(&read_args.iov, sizeof(read_args.iov), (void *)msgvec + offset);
  1504. //
  1505. // offset = offsetof(typeof(struct mmsghdr), msg_hdr) +
  1506. // offsetof(typeof(struct user_msghdr), msg_iovlen);
  1507. //
  1508. // bpf_probe_read(&read_args.iovlen, sizeof(read_args.iovlen), (void *)msgvec + offset);
  1509. //
  1510. // read_args.msg_len = (void *)msgvec + offsetof(typeof(struct mmsghdr), msg_len);
  1511. // read_args.tcp_seq = get_tcp_read_seq_from_fd(sockfd);
  1512. // active_read_args_map__update(&id, &read_args);
  1513. // }
  1514. //
  1515. // return 0;
  1516. //}
  1517. //
  1518. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_recvmmsg/format
  1519. //TPPROG(sys_exit_recvmmsg) (struct syscall_comm_exit_ctx *ctx) {
  1520. // __u64 id = bpf_get_current_pid_tgid();
  1521. // int num_msgs = ctx->ret;
  1522. // // Unstash arguments, and process syscall.
  1523. // struct data_args_t* read_args = active_read_args_map__lookup(&id);
  1524. // if (read_args != NULL && num_msgs > 0) {
  1525. // ssize_t bytes_count;
  1526. // bpf_probe_read(&bytes_count, sizeof(read_args->msg_len), read_args->msg_len);
  1527. // process_syscall_data_vecs((struct pt_regs *)ctx, id, T_INGRESS, read_args, bytes_count);
  1528. // }
  1529. // active_read_args_map__delete(&id);
  1530. //
  1531. // return 0;
  1532. //}
  1533. //
  1534. ////static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
  1535. //// unsigned long vlen, rwf_t flags)
  1536. //// ssize_t writev(int fd, const struct iovec *iov, int iovcnt);
  1537. //KPROG(do_writev) (struct pt_regs* ctx) {
  1538. // __u64 id = bpf_get_current_pid_tgid();
  1539. // int fd = (int)PT_REGS_PARM1(ctx);
  1540. // struct iovec *iov = (struct iovec *)PT_REGS_PARM2(ctx);
  1541. // int iovlen = (int)PT_REGS_PARM3(ctx);
  1542. //
  1543. // // Stash arguments.
  1544. // struct data_args_t write_args = {};
  1545. // write_args.source_fn = SYSCALL_FUNC_WRITEV;
  1546. // write_args.fd = fd;
  1547. // write_args.iov = iov;
  1548. // write_args.iovlen = iovlen;
  1549. // write_args.enter_ts = bpf_ktime_get_ns();
  1550. // active_write_args_map__update(&id, &write_args);
  1551. // return 0;
  1552. //}
  1553. //
  1554. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_writev/format
  1555. //TPPROG(sys_exit_writev) (struct syscall_comm_exit_ctx *ctx) {
  1556. // __u64 id = bpf_get_current_pid_tgid();
  1557. // ssize_t bytes_count = ctx->ret;
  1558. //
  1559. // // Unstash arguments, and process syscall.
  1560. // struct data_args_t* write_args = active_write_args_map__lookup(&id);
  1561. // if (write_args != NULL) {
  1562. // write_args->bytes_count = bytes_count;
  1563. // process_syscall_data_vecs((struct pt_regs *)ctx, id, T_EGRESS, write_args, bytes_count);
  1564. // }
  1565. //
  1566. // active_write_args_map__delete(&id);
  1567. // return 0;
  1568. //}
  1569. //
  1570. //// ssize_t readv(int fd, const struct iovec *iov, int iovcnt);
  1571. //KPROG(do_readv) (struct pt_regs* ctx) {
  1572. // __u64 id = bpf_get_current_pid_tgid();
  1573. // int fd = (int)PT_REGS_PARM1(ctx);
  1574. // struct iovec *iov = (struct iovec *)PT_REGS_PARM2(ctx);
  1575. // int iovlen = (int)PT_REGS_PARM3(ctx);
  1576. //
  1577. // // Stash arguments.
  1578. // struct data_args_t read_args = {};
  1579. // read_args.source_fn = SYSCALL_FUNC_READV;
  1580. // read_args.fd = fd;
  1581. // read_args.iov = iov;
  1582. // read_args.iovlen = iovlen;
  1583. // read_args.enter_ts = bpf_ktime_get_ns();
  1584. // read_args.tcp_seq = get_tcp_read_seq_from_fd(fd);
  1585. // active_read_args_map__update(&id, &read_args);
  1586. //
  1587. // return 0;
  1588. //}
  1589. //
  1590. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_readv/format
  1591. //TPPROG(sys_exit_readv) (struct syscall_comm_exit_ctx *ctx) {
  1592. // __u64 id = bpf_get_current_pid_tgid();
  1593. // ssize_t bytes_count = ctx->ret;
  1594. // struct data_args_t* read_args = active_read_args_map__lookup(&id);
  1595. // if (read_args != NULL) {
  1596. // read_args->bytes_count = bytes_count;
  1597. // process_syscall_data_vecs((struct pt_regs *)ctx, id, T_INGRESS, read_args, bytes_count);
  1598. // }
  1599. //
  1600. // active_read_args_map__delete(&id);
  1601. // return 0;
  1602. //}
  1603. //
  1604. //// /sys/kernel/debug/tracing/events/syscalls/sys_enter_close/format
  1605. //// 为什么不用tcp_fin? 主要原因要考虑UDP场景。
  1606. //TPPROG(sys_enter_close) (struct syscall_comm_enter_ctx *ctx) {
  1607. // int fd = ctx->fd;
  1608. // //Ignore stdin, stdout and stderr
  1609. // if (fd <= 2)
  1610. // return 0;
  1611. //
  1612. // __u32 k0 = 0;
  1613. // struct member_fields_offset *offset = members_offset__lookup(&k0);
  1614. // if (!offset)
  1615. // return 0;
  1616. //
  1617. // CHECK_OFFSET_READY(fd);
  1618. //
  1619. // __u64 sock_addr = (__u64)get_socket_from_fd(fd, offset);
  1620. // if (sock_addr) {
  1621. // __u64 conn_key = gen_conn_key_id(bpf_get_current_pid_tgid() >> 32, (__u64)fd);
  1622. // struct socket_info_t *socket_info_ptr = socket_info_map__lookup(&conn_key);
  1623. // if (socket_info_ptr != NULL)
  1624. // delete_socket_info(conn_key, socket_info_ptr);
  1625. // }
  1626. //
  1627. // return 0;
  1628. //}
  1629. //
  1630. //// /sys/kernel/debug/tracing/events/syscalls/sys_enter_getppid
  1631. //// 此处tracepoint用于周期性的将驻留在缓存中还未发送的数据发给用户态接收程序处理。
  1632. //TPPROG(sys_enter_getppid) (struct syscall_comm_enter_ctx *ctx) {
  1633. // int k0 = 0;
  1634. // struct __socket_data_buffer *v_buff = bpf_map_lookup_elem(&NAME(data_buf), &k0);
  1635. // if (v_buff) {
  1636. // if (v_buff->events_num > 0) {
  1637. // struct __socket_data *v = (struct __socket_data *)&v_buff->data[0];
  1638. // if ((bpf_ktime_get_ns() - v->timestamp * NS_PER_US) > NS_PER_SEC) {
  1639. // __u32 buf_size = (v_buff->len +
  1640. // offsetof(typeof(struct __socket_data_buffer), data))
  1641. // & (sizeof(*v_buff) - 1);
  1642. // if (buf_size >= sizeof(*v_buff))
  1643. // bpf_perf_event_output(ctx, &NAME(socket_data),
  1644. // BPF_F_CURRENT_CPU, v_buff,
  1645. // sizeof(*v_buff));
  1646. // else
  1647. // /* 使用'buf_size + 1'代替'buf_size',来规避(Linux 4.14.x)长度检查 */
  1648. // bpf_perf_event_output(ctx, &NAME(socket_data),
  1649. // BPF_F_CURRENT_CPU, v_buff,
  1650. // buf_size + 1);
  1651. //
  1652. // v_buff->events_num = 0;
  1653. // v_buff->len = 0;
  1654. // }
  1655. // }
  1656. // }
  1657. //
  1658. // return 0;
  1659. //}
  1660. //
  1661. //// /sys/kernel/debug/tracing/events/syscalls/sys_exit_socket/format
  1662. //TPPROG(sys_exit_socket) (struct syscall_comm_exit_ctx *ctx) {
  1663. // __u64 id = bpf_get_current_pid_tgid();
  1664. // __u64 fd = (__u64)ctx->ret;
  1665. // char comm[TASK_COMM_LEN];
  1666. // bpf_get_current_comm(comm, sizeof(comm));
  1667. //
  1668. // // 试用于nginx负载均衡场景
  1669. // if (!(comm[0] == 'n' && comm[1] == 'g' && comm[2] == 'i' &&
  1670. // comm[3] == 'n' && comm[4] == 'x' && comm[5] == '\0'))
  1671. // return 0;
  1672. //
  1673. // // nginx is not a go process, disable go tracking
  1674. // struct trace_key_t key = get_trace_key(0, true);
  1675. // struct trace_info_t *trace = trace_map__lookup(&key);
  1676. // if (trace && trace->peer_fd != 0 && trace->peer_fd != (__u32)fd) {
  1677. // struct socket_info_t sk_info = { 0 };
  1678. // sk_info.peer_fd = trace->peer_fd;
  1679. // sk_info.trace_id = trace->thread_trace_id;
  1680. // __u64 conn_key = gen_conn_key_id(id >> 32, fd);
  1681. // int ret = socket_info_map__update(&conn_key, &sk_info);
  1682. // __u32 k0 = 0;
  1683. // struct trace_stats *trace_stats = trace_stats_map__lookup(&k0);
  1684. // if (trace_stats == NULL)
  1685. // return 0;
  1686. // if (ret == 0) {
  1687. // __sync_fetch_and_add(&trace_stats->
  1688. // socket_map_count, 1);
  1689. // }
  1690. // }
  1691. //
  1692. // return 0;
  1693. //}
  1694. // Store IO event information
  1695. MAP_PERARRAY(io_event_buffer, __u32, struct __io_event_buffer, 1)
  1696. /*
  1697. * This eBPF program is specially used to transmit data to the agent. The purpose
  1698. * of this is to solve the problem that the number of instructions exceeds the limit.
  1699. */
  1700. static __inline int output_data_common(void *ctx) {
  1701. __u64 id = bpf_get_current_pid_tgid();
  1702. enum traffic_direction dir;
  1703. bool vecs = false;
  1704. int max_size = 0;
  1705. __u32 k0 = 0;
  1706. char *buffer = NULL;
  1707. struct __socket_data_buffer *v_buff = bpf_map_lookup_elem(&NAME(data_buf), &k0);
  1708. if (!v_buff)
  1709. goto clear_args_map_2;
  1710. struct tail_calls_context *context =
  1711. (struct tail_calls_context *)(v_buff->data + v_buff->len +
  1712. offsetof(typeof(struct __socket_data),
  1713. data));
  1714. if ((v_buff->len + offsetof(typeof(struct __socket_data), data) +
  1715. sizeof(struct tail_calls_context)) > sizeof(v_buff->data)) {
  1716. goto clear_args_map_2;
  1717. }
  1718. dir = context->dir;
  1719. vecs = context->vecs;
  1720. max_size = context->max_size_limit;
  1721. struct data_args_t *args;
  1722. if (dir == T_INGRESS)
  1723. args = active_read_args_map__lookup(&id);
  1724. else
  1725. args = active_write_args_map__lookup(&id);
  1726. if (args == NULL)
  1727. goto clear_args_map_1;
  1728. struct __socket_data *v =
  1729. (struct __socket_data *)(v_buff->data + v_buff->len);
  1730. if (v_buff->len > (sizeof(v_buff->data) - sizeof(*v)))
  1731. goto clear_args_map_1;
  1732. if (v->source == DATA_SOURCE_IO_EVENT) {
  1733. buffer = (char *)io_event_buffer__lookup(&k0);
  1734. if (buffer == NULL) {
  1735. goto clear_args_map_1;
  1736. }
  1737. } else {
  1738. buffer = (char *)args->buf;
  1739. }
  1740. __u32 __len = v->syscall_len > max_size ? max_size : v->syscall_len;
  1741. /*
  1742. * the bitwise AND operation will set the range of possible values for
  1743. * the UNKNOWN_VALUE register to [0, BUFSIZE)
  1744. */
  1745. __u32 len = __len & (sizeof(v->data) - 1);
  1746. if (vecs) {
  1747. len = iovecs_copy(v, v_buff, args, v->syscall_len, len);
  1748. } else {
  1749. if (__len >= sizeof(v->data)) {
  1750. if (unlikely(bpf_probe_read(v->data, sizeof(v->data), buffer) != 0))
  1751. goto clear_args_map_1;
  1752. len = sizeof(v->data);
  1753. } else {
  1754. /*
  1755. * https://elixir.bootlin.com/linux/v4.14/source/kernel/bpf/verifier.c#812
  1756. * __check_map_access() 触发条件检查(size <= 0)
  1757. * ```
  1758. * if (off < 0 || size <= 0 || off + size > map->value_size)
  1759. * ```
  1760. * "invalid access to map value, value_size=10888 off=135 size=0"
  1761. * 使用'len + 1'代替'len',来规避(Linux 4.14.x)这个检查。
  1762. */
  1763. if (unlikely(bpf_probe_read(v->data,
  1764. len + 1,
  1765. buffer) != 0))
  1766. goto clear_args_map_1;
  1767. }
  1768. }
  1769. v->data_len = len;
  1770. debug("start=======================");
  1771. debug("Pid: %u", v->pid);
  1772. debug("Tgid: %u", v->tgid);
  1773. debug("CoroutineID: %llu", v->coroutine_id);
  1774. debug("Source: %u", v->source);
  1775. debug("Comm: %s", v->comm);
  1776. debug("SocketID: %llu", v->socket_id);
  1777. debug("ExtraData: %u", v->extra_data);
  1778. debug("ExtraDataCount: %u", v->extra_data_count);
  1779. debug("TCPSeq: %u", v->tcp_seq);
  1780. debug("ThreadTraceID: %llu", v->thread_trace_id);
  1781. debug("Timestamp: %llu", v->timestamp);
  1782. debug("Direction: %u", v->direction);
  1783. debug("MsgType: %u", v->msg_type);
  1784. debug("SyscallLen: %llu", v->syscall_len);
  1785. debug("DataSeq: %llu", v->data_seq);
  1786. debug("DataType: %u", v->data_type);
  1787. debug("DataLen: %u", v->data_len);
  1788. // debug("data: %s", v->data);
  1789. // for (size_t i = 0; i < v->data_len; ++i) {
  1790. // debug("%02x ", (unsigned char)v->data[i]);
  1791. // if ((i + 1) % 16 == 0) {
  1792. // debug("\n");
  1793. // }
  1794. // }
  1795. debug("=======================end\n");
  1796. v_buff->len += offsetof(typeof(struct __socket_data), data) + v->data_len;
  1797. v_buff->events_num++;
  1798. if (v_buff->events_num >= EVENT_BURST_NUM ||
  1799. ((sizeof(v_buff->data) - v_buff->len) < sizeof(*v))) {
  1800. __u32 buf_size = (v_buff->len + offsetof(typeof(struct __socket_data_buffer), data))
  1801. & (sizeof(*v_buff) - 1);
  1802. if (buf_size >= sizeof(*v_buff))
  1803. bpf_perf_event_output(ctx, &NAME(socket_data),
  1804. BPF_F_CURRENT_CPU, v_buff,
  1805. sizeof(*v_buff));
  1806. else
  1807. /* 使用'buf_size + 1'代替'buf_size',来规避(Linux 4.14.x)长度检查 */
  1808. bpf_perf_event_output(ctx, &NAME(socket_data),
  1809. BPF_F_CURRENT_CPU, v_buff,
  1810. buf_size + 1);
  1811. v_buff->events_num = 0;
  1812. v_buff->len = 0;
  1813. }
  1814. clear_args_map_1:
  1815. if (dir == T_INGRESS)
  1816. active_read_args_map__delete(&id);
  1817. else
  1818. active_write_args_map__delete(&id);
  1819. return 0;
  1820. clear_args_map_2:
  1821. active_read_args_map__delete(&id);
  1822. active_write_args_map__delete(&id);
  1823. return 0;
  1824. }
  1825. PROGTP(output_data) (void *ctx)
  1826. {
  1827. return output_data_common(ctx);
  1828. }
  1829. PROGKP(output_data) (void *ctx)
  1830. {
  1831. return output_data_common(ctx);
  1832. }
  1833. static __inline int data_submit(void *ctx)
  1834. {
  1835. int ret = 0;
  1836. __u32 k0 = 0;
  1837. struct ctx_info_s *ctx_map =
  1838. bpf_map_lookup_elem(&NAME(ctx_info), &k0);
  1839. if (!ctx_map)
  1840. return SUBMIT_ABORT;
  1841. __u64 id = bpf_get_current_pid_tgid();
  1842. struct conn_info_t *conn_info;
  1843. struct conn_info_t __conn_info = ctx_map->tail_call.conn_info;
  1844. conn_info = &__conn_info;
  1845. __u64 conn_key = gen_conn_key_id(id >> 32, (__u64)conn_info->fd);
  1846. conn_info->socket_info_ptr = socket_info_map__lookup(&conn_key);
  1847. struct data_args_t *args;
  1848. if (conn_info->direction == T_INGRESS)
  1849. args = active_read_args_map__lookup(&id);
  1850. else
  1851. args = active_write_args_map__lookup(&id);
  1852. if (args == NULL)
  1853. return SUBMIT_ABORT;
  1854. const bool vecs = ctx_map->tail_call.extra.vecs;
  1855. __u32 bytes_count = ctx_map->tail_call.bytes_count;
  1856. struct member_fields_offset *offset = ctx_map->tail_call.offset;
  1857. __u64 enter_ts = args->enter_ts;
  1858. const struct process_data_extra extra = ctx_map->tail_call.extra;
  1859. ret = __data_submit(ctx, conn_info, args, vecs, bytes_count,
  1860. offset, enter_ts, &extra);
  1861. return ret;
  1862. }
  1863. PROGTP(data_submit) (void *ctx)
  1864. { int ret;
  1865. ret = data_submit(ctx);
  1866. if (ret == SUBMIT_OK) {
  1867. bpf_tail_call(ctx, &NAME(progs_jmp_tp_map),
  1868. PROG_OUTPUT_DATA_TP_IDX);
  1869. } else if (ret == SUBMIT_ABORT) {
  1870. return 0;
  1871. } else {
  1872. bpf_tail_call(ctx, &NAME(progs_jmp_tp_map),
  1873. PROG_IO_EVENT_TP_IDX);
  1874. }
  1875. return 0;
  1876. }
  1877. PROGKP(data_submit) (void *ctx)
  1878. {
  1879. int ret;
  1880. ret = data_submit(ctx);
  1881. if (ret == SUBMIT_OK) {
  1882. bpf_tail_call(ctx, &NAME(progs_jmp_kp_map),
  1883. PROG_OUTPUT_DATA_KP_IDX);
  1884. } else if (ret == SUBMIT_ABORT) {
  1885. return 0;
  1886. } else {
  1887. __u64 id = bpf_get_current_pid_tgid();
  1888. active_read_args_map__delete(&id);
  1889. active_write_args_map__delete(&id);
  1890. }
  1891. return 0;
  1892. }
  1893. static __inline bool is_regular_file(int fd)
  1894. {
  1895. __u32 k0 = 0;
  1896. struct member_fields_offset *offset = members_offset__lookup(&k0);
  1897. void *file = fd_to_file(fd, offset);
  1898. __u32 i_mode = file_to_i_mode(file, offset);
  1899. return S_ISREG(i_mode);
  1900. }
  1901. static __inline char *fd_to_name(int fd)
  1902. {
  1903. __u32 k0 = 0;
  1904. struct member_fields_offset *offset = members_offset__lookup(&k0);
  1905. void *file = fd_to_file(fd, offset);
  1906. return file_to_name(file, offset);
  1907. }
  1908. static __inline void trace_io_event_common(void *ctx,
  1909. struct data_args_t *data_args,
  1910. enum traffic_direction direction,
  1911. __u64 pid_tgid)
  1912. {
  1913. __u64 latency = 0;
  1914. __u64 trace_id = 0;
  1915. __u32 k0 = 0;
  1916. __u32 tgid = pid_tgid >> 32;
  1917. if (data_args->bytes_count == 0) {
  1918. return;
  1919. }
  1920. struct trace_conf_t *trace_conf = trace_conf_map__lookup(&k0);
  1921. if (trace_conf == NULL) {
  1922. return;
  1923. }
  1924. if (trace_conf->io_event_collect_mode == 0) {
  1925. return;
  1926. }
  1927. __u32 timeout = trace_conf->go_tracing_timeout;
  1928. struct trace_key_t trace_key = get_trace_key(timeout, false);
  1929. struct trace_info_t *trace_info_ptr = trace_map__lookup(&trace_key);
  1930. if (trace_info_ptr) {
  1931. trace_id = trace_info_ptr->thread_trace_id;
  1932. }
  1933. if (trace_id == 0 && trace_conf->io_event_collect_mode == 1) {
  1934. return;
  1935. }
  1936. int data_max_sz = trace_conf->data_limit_max;
  1937. if (!is_regular_file(data_args->fd)) {
  1938. return;
  1939. }
  1940. latency = bpf_ktime_get_ns() - data_args->enter_ts;
  1941. if (latency < trace_conf->io_event_minimal_duration) {
  1942. return;
  1943. }
  1944. char *name = fd_to_name(data_args->fd);
  1945. struct __io_event_buffer *buffer = io_event_buffer__lookup(&k0);
  1946. if (!buffer) {
  1947. return;
  1948. }
  1949. buffer->bytes_count = data_args->bytes_count;
  1950. buffer->latency = latency;
  1951. buffer->operation = direction;
  1952. bpf_probe_read_str(buffer->filename, sizeof(buffer->filename), name);
  1953. buffer->filename[sizeof(buffer->filename) - 1] = '\0';
  1954. struct __socket_data_buffer *v_buff =
  1955. bpf_map_lookup_elem(&NAME(data_buf), &k0);
  1956. if (!v_buff)
  1957. return;
  1958. struct __socket_data *v = (struct __socket_data *)&v_buff->data[0];
  1959. if (v_buff->len > (sizeof(v_buff->data) - sizeof(*v)))
  1960. return;
  1961. v = (struct __socket_data *)(v_buff->data + v_buff->len);
  1962. __builtin_memset(v, 0, offsetof(typeof(struct __socket_data), data));
  1963. v->tgid = tgid;
  1964. v->pid = (__u32)pid_tgid;
  1965. v->coroutine_id = trace_key.goid;
  1966. v->timestamp = data_args->enter_ts;
  1967. v->syscall_len = sizeof(*buffer);
  1968. v->source = DATA_SOURCE_IO_EVENT;
  1969. v->thread_trace_id = trace_id;
  1970. bpf_get_current_comm(v->comm, sizeof(v->comm));
  1971. struct tail_calls_context *context =
  1972. (struct tail_calls_context *)v->data;
  1973. context->max_size_limit = data_max_sz;
  1974. context->vecs = false;
  1975. context->dir = direction;
  1976. bpf_tail_call(ctx, &NAME(progs_jmp_tp_map),
  1977. PROG_OUTPUT_DATA_TP_IDX);
  1978. return;
  1979. }
  1980. //
  1981. //PROGTP(io_event)(void *ctx)
  1982. //{
  1983. // __u64 id = bpf_get_current_pid_tgid();
  1984. //
  1985. // struct data_args_t *data_args = NULL;
  1986. //
  1987. // data_args = active_read_args_map__lookup(&id);
  1988. // if (data_args) {
  1989. // trace_io_event_common(ctx, data_args, T_INGRESS, id);
  1990. // active_read_args_map__delete(&id);
  1991. // return 0;
  1992. // }
  1993. //
  1994. // data_args = active_write_args_map__lookup(&id);
  1995. // if (data_args) {
  1996. // trace_io_event_common(ctx, data_args, T_EGRESS, id);
  1997. // active_write_args_map__delete(&id);
  1998. // return 0;
  1999. // }
  2000. //
  2001. // return 0;
  2002. //}
  2003. //Refer to the eBPF programs here
  2004. //#include "go_tls_bpf.c"
  2005. //#include "go_http2_bpf.c"
  2006. //#include "openssl_bpf.c"