root
/
ebpf


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
							package l7

import (
	"encoding/binary"
	"fmt"
	"strings"

	"go.mongodb.org/mongo-driver/bson"
)

const (
	MongoOpMSG = 2013

	// MongoDB Wire Protocol OP_MSG Structure:
	// [16 bytes: header] + [4 bytes: flag_bits] + [sections...]
	mongoHeaderLength      = 16 // length(4) + request_id(4) + response_to(4) + op_code(4)
	mongoFlagBitsLength    = 4
	mongoOpCodeOffset      = 12
	mongoSectionKindLength = 1
	mongoSectionKindCmd    = 0 // Section 0: Command document (metadata)
	mongoSectionKindDocs   = 1 // Section 1: Document sequence (actual data)

	// 默认操作类型
	invalidDataResult = "<truncated>"
)

// ParseMongo 解析 MongoDB Wire Protocol (OP_MSG) 的 payload
// 返回格式：opType|section0 或 opType|section0@[section1]
func ParseMongo(payload []byte) string {

	minLength := mongoHeaderLength + mongoFlagBitsLength + mongoSectionKindLength + 4 // +4 for BSON length
	if len(payload) < minLength {
		return invalidDataResult
	}

	// 验证是否为 OP_MSG
	opCode := binary.LittleEndian.Uint32(payload[mongoOpCodeOffset:])
	if opCode != MongoOpMSG {
		return invalidDataResult
	}

	// 跳过 header (16 bytes) + flag_bits (4 bytes)
	offset := mongoHeaderLength + mongoFlagBitsLength

	var section0 string    // 命令元数据
	var section0Raw []byte // Section 0 原始 BSON 数据（用于提取操作类型）
	var section1 string    // 文档数据

	// 解析所有 sections
	for offset < len(payload) {
		if offset+mongoSectionKindLength >= len(payload) {
			break
		}
		sectionKind := payload[offset]
		offset += mongoSectionKindLength

		if sectionKind == mongoSectionKindCmd {
			// Section 0: 命令文档（元数据）
			if offset+4 > len(payload) {
				break
			}
			bsonLen := int(binary.LittleEndian.Uint32(payload[offset:]))
			if bsonLen < 5 || offset+bsonLen > len(payload) {
				break
			}
			section0Raw = payload[offset : offset+bsonLen]
			section0 = bson.Raw(section0Raw).String()

			// Section 0 是必需的，解析失败则提前返回
			if section0 == "" {
				return invalidDataResult
			}

			/*// 提取操作类型（BSON 文档的第一个字段名）
			cmd = extractMongoOpType(section0Raw)*/

			offset += bsonLen

		} else if sectionKind == mongoSectionKindDocs {
			// Section 1: 文档序列（真正的数据）
			if offset+4 > len(payload) {
				break
			}
			// Section 1 格式: [4 bytes size] + [identifier C-string] + [BSON documents]
			// 注意：size 包含了 size 字段本身的 4 字节
			section1Size := int(binary.LittleEndian.Uint32(payload[offset:]))

			// 验证：从 offset 开始需要 section1Size 字节（包含 size 字段）
			if section1Size < 5 || offset+section1Size > len(payload) {
				break
			}

			// 提取数据：跳过 size 字段的 4 字节
			section1Data := payload[offset+4 : offset+section1Size]

			// 跳过 identifier（null-terminated string）
			identifierEnd := 0
			for i, b := range section1Data {
				if b == 0 {
					identifierEnd = i + 1
					break
				}
				if i > 20 { // 防止无限循环,identifier一般不会超过20个字节
					break
				}
			}

			if identifierEnd > 0 && identifierEnd < len(section1Data) {
				// 解析 BSON 文档数组
				docsData := section1Data[identifierEnd:]
				docs := parseBSONDocuments(docsData)
				if len(docs) > 0 {
					section1 = strings.Join(docs, ", ")
				}
			}

			offset += section1Size
		} else {
			// 未知的 section kind，停止解析
			break
		}
	}

	// 构建返回结果 格式：
	//   - 有 section1: "section0@[section1]"  (例: {"insert":"users"}@[{"name":"Alice"}, ...])
	//   - 无 section1: "section0"  (例: {"find":"users","filter":{...}})
	var baseResult string
	if section1 != "" {
		// 同时包含命令和文档（批量操作）
		baseResult = fmt.Sprintf("%s@[%s]", section0, section1)
	} else {
		// 只有命令（查询、单文档操作）
		baseResult = section0
	}

	return baseResult
}

// extractMongoOpType 从 Section 0 的 BSON 文档中提取操作类型
// BSON 格式：[4 bytes: length] + [1 byte: type] + [cstring: field name] + [value] + ... + [0x00]
/*func extractMongoOpType(bsonData []byte) string {
	if len(bsonData) < 5 {
		return ""
	}
	// 跳过文档长度（前 4 字节）
	offset := 4

	// 读取第一个元素的 type（1 字节）
	if offset >= len(bsonData) {
		return ""
	}
	// elementType := bsonData[offset] // 暂不需要
	offset += 1

	// 读取第一个元素的 field name（null-terminated string）
	fieldNameStart := offset
	for offset < len(bsonData) && bsonData[offset] != 0 {
		offset++
		if offset-fieldNameStart > 50 { // 防止无限循环
			return ""
		}
	}

	if offset >= len(bsonData) {
		return ""
	}
	// 提取操作类型（字段名）,操作类型总是 BSON 文档的第一个字段，所以这里直接提取
	opType := string(bsonData[fieldNameStart:offset])

	// 过滤掉内部字段（以 $ 开头的）（理论上不会，容错）
	if strings.HasPrefix(opType, "$") {
		return ""
	}
	return opType
}*/

// parseBSONDocuments 解析连续的 BSON 文档
func parseBSONDocuments(data []byte) []string {
	var docs []string
	offset := 0

	for offset < len(data) {
		if offset+4 > len(data) {
			break
		}
		// 读取 BSON 文档长度
		bsonLen := int(binary.LittleEndian.Uint32(data[offset:]))
		if bsonLen < 5 || offset+bsonLen > len(data) {
			break
		}
		// 解析 BSON 文档
		doc := bson.Raw(data[offset : offset+bsonLen])
		docs = append(docs, doc.String())

		offset += bsonLen

		// 限制解析数量，避免过多文档, 最多解析前1个文档
		if len(docs) > 1 {
			docs = append(docs, "...")
			break
		}
	}
	return docs
}