ソースを参照

Fixed #TASK_QT-9810 对接OmniAgent

Carl 1 年間 前
コミット
9ac3f00a50

+ 1 - 0
containers/container.go

@@ -390,6 +390,7 @@ func (c *Container) onProcessStart(pid uint32) *Process {
 	defer c.lock.Unlock()
 	stats, err := TaskstatsPID(pid)
 	if err != nil {
+		klog.WithError(err).Errorf("Failed onProcessStart [%d]", pid)
 		return nil
 	}
 	c.zombieAt = time.Time{}

+ 5 - 4
dist/package_dir/bin/agentctl

@@ -132,13 +132,14 @@ getAgentPid() {
 startAgent() {
   local pid
   if [ -f "${AGENT_BIN_DIR}/${AGENT_PROC}" ]; then
-    ${AGENT_BIN_DIR}/start.sh "${AGENT_BIN_DIR}/${AGENT_PROC}"
-#    local params="server --port 31767"
+    export DISABLE_E2E_TRACING=true
+    export DISABLE_STACK_TRACING=true
+    local params="--listen=0.0.0.0:8123"
 #    if [ -f "/etc/chaosd/pki/ca.crt" ] && [ -f "/etc/chaosd/pki/chaosd.crt" ] && [ -f "/etc/chaosd/pki/chaosd.key" ]; then
 #        params="server  --https-port 31768 --CA=/etc/chaosd/pki/ca.crt --cert=/etc/chaosd/pki/chaosd.crt --key=/etc/chaosd/pki/chaosd.key"
 #    fi
-#    pid=$(nohup ${AGENT_BIN_DIR}/${AGENT_PROC} ${params}>>"${AGENT_LOG_DIR}/service.log" 2>&1 & echo $!)
-#    echo ${pid} > "${AGENT_PID_FILE}"
+    pid=$(nohup ${AGENT_BIN_DIR}/${AGENT_PROC} ${params}>>"${AGENT_LOG_DIR}/service.log" 2>&1 & echo $!)
+    echo ${pid} > "${AGENT_PID_FILE}"
   else
     echo "${AGENT_BIN_DIR}/${AGENT_PROC} not exist."
     exit "${EXIT_CODE_GENERIC_ERROR}"

+ 28 - 0
dist/scripts/install_temp.sh

@@ -2864,6 +2864,33 @@ deployFiles() {
   setupAll
   createCurrentVersionSymlink
 }
+enableRootDropping() {
+  local output=
+  if ! isAvailable setcap; then
+    toConsoleWarn "Failed to enable non-privileged mode, kernel does not support file capabilities. Set NON_ROOT_MODE=false."
+    #== writeParamToConfigFile "${CONF_FIELD_NM_NON_ROOT_MODE}" "false" "${LEGACY_AGENT_CONF_FILE}"
+    #== writeParamToConfigFile "${CONF_FIELD_NM_NON_ROOT_MODE}" "false" "${INSTALLER_CONF_FILE}"
+    #== editScriptFileParam "readonly PARAM_NON_ROOT_MODE" "false" "${AGENT_SCRIPTS_DIR}/${SERVICE_SCRIPT_FILE}"
+    return
+  fi
+
+  #== 设置文件权限 (https://www.cnblogs.com/xzongblogs/p/14106481.html)
+  #== CAP_DAC_OVERRIDE:绕过文件的读,写,和执行权限检查。
+  #== CAP_FOWNER:对于通常要求进程的文件系统 UID 与文件的 UID 匹配的操作,绕过权限检查 (比如,chmod(2),utime(2)),除了那些包含在 CAP_DAC_OVERRIDE 和 CAP_DAC_READ_SEARCH 中的操作
+  #== CAP_IPC_LOCK:允许锁定共享内存片段
+  #== CAP_SYS_PTRACE:允许跟踪任何进程
+  #== CAP_SYS_ADMIN:访问特权 perf 事件信息
+  #== CAP_SYS_RESOURCE:忽略资源限制
+  #== CAP_NET_ADMIN:允许执行多种网络有关的操作
+  commandErrorWrapper setcap cap_dac_override,cap_fowner,cap_ipc_lock,cap_sys_ptrace,cap_sys_admin,cap_sys_resource,cap_net_admin+ep "${AGENT_BIN_DIR}/${AGENT_PROC}"
+  local setCapCwServerAgentExitCode=$?
+  if [ ${setCapCwServerAgentExitCode} -eq 0 ] ; then
+    toConsoleInfo "Set file capabilities [${AGENT_PROC}]"
+  else
+    toConsoleWarn "Failed to enable non-privileged mode. Exit Code : ${setCapCwServerAgentExitCode} . For details, see: ${LOG_FILE}"
+    toLogWarn "Set file capabilities output: ${output}"
+  fi
+}
 
 #== 【5】=配置安装
 configureInstallation() {
@@ -2885,6 +2912,7 @@ configureInstallation() {
       changeFilesOwnership
     fi
     #== 设置root权限
+    enableRootDropping
   fi
   #== 系统配置(策略配置、dump proc、agent 进程配置、自动启动)
 #  setupAutostart

+ 7 - 2
dist/scripts/uninstall.sh

@@ -21,7 +21,7 @@ readonly BRAND_AGENT_DEFAULT_USER_AND_GROUP_NAME="cloudwise"
 #== **********************************************************
 #== smartagentd
 readonly AGENT_PROC="euspace"
-readonly AGENT_INSTALLER_VERSION=0.0.1
+readonly AGENT_INSTALLER_VERSION=0.0.7
 #== **********************************************************
 #==  配置目录
 #== **********************************************************
@@ -50,6 +50,11 @@ readonly AGENT_SCRIPTS_DIR="${INSTALL_DIR}/scripts"
 #== agent 初始化脚本目录 /opt/cloudwise/omniagent/bin
 readonly AGENT_BIN_DIR="${INSTALL_DIR}/bin"
 
+readonly AGENT_AGENTS_DIR="${INSTALL_DIR}/agents"
+
+readonly AGENT_RUNTIME_DIR="${INSTALL_DIR}/runtime"
+
+
 
 readonly PERL_FILE="/usr/bin/perl"
 
@@ -1071,7 +1076,7 @@ cleanUpSELinux() {
 cleanUpFilesDuringUninstall() {
 	toLogInfo "This is a full uninstaller, cleaning up rest of the files"
 
-  local fileNames="${AGENT_CONF_DIR} ${AGENT_BIN_DIR} ${LOG_DIR} ${AGENT_SCRIPTS_DIR} ${AGENT_LIBS_DIR}"
+  local fileNames="${AGENT_CONF_DIR} ${AGENT_BIN_DIR} ${LOG_DIR} ${AGENT_SCRIPTS_DIR} ${AGENT_LIBS_DIR} ${AGENT_AGENTS_DIR} ${AGENT_RUNTIME_DIR}"
   for fileName in ${fileNames}; do
 	  removeIfExists "${fileName}"
   done

+ 3 - 4
ebpftracer/tracer/offset.go

@@ -73,7 +73,7 @@ func handleConnection(conn net.Conn) {
 		//}
 
 		request := strings.TrimSpace(string(buffer[:n]))
-		fmt.Println(request)
+		klog.Infof("[eBPF Kernel Adapt] Request received: %v", request)
 		if request == "hello" {
 			_, err := conn.Write([]byte("OK"))
 			if err != nil {
@@ -103,7 +103,7 @@ func kernelOffsetInferClient() error {
 	for {
 		n, err := conn.Read(buffer)
 		if err != nil {
-			fmt.Printf("[eBPF Kernel Adapt] Error reading response from server: %v\n", err)
+			klog.Errorf("[eBPF Kernel Adapt] Error reading from connection: %v", err)
 			break
 		}
 		if n == 0 {
@@ -111,8 +111,7 @@ func kernelOffsetInferClient() error {
 		}
 
 		response := strings.TrimSpace(string(buffer[:n]))
-		klog.Infoln(response)
-
+		klog.Infof("[eBPF Kernel Adapt] Response received: %v", response)
 		if response == "OK" {
 			break
 		}

+ 1 - 1
ebpftracer/tracer/socket.go

@@ -107,7 +107,7 @@ func insert_adapt_kern_uid_to_map(collection *ebpf.Collection) {
 	pid := os.Getpid()
 	tid := syscall.Gettid()
 	adaptKernUID := uint64(pid)<<32 | uint64(tid)
-	code, err := bpf_table_set_value(collection, MAP_ADAPT_KERN_UID_NAME, 0, uint32(adaptKernUID))
+	code, err := bpf_table_set_value(collection, MAP_ADAPT_KERN_UID_NAME, 0, adaptKernUID)
 	if err != nil || code != ETR_OK {
 		klog.Error(err, code)
 	}

+ 3 - 3
flags/flags.go

@@ -26,11 +26,11 @@ var (
 	ServerPrefix        = kingpin.Flag("server-prefix", "server-prefix").Envar("SERVER_PREFIX").Default("").String()
 	DisableRegisterHost = kingpin.Flag("disable-reg-host", "Disable reg host").Default("true").Envar("DISABLE_REG_HOST").Bool()
 	// agent
-	DisableStackTracing = kingpin.Flag("disable-stack-tracing", "Disable stack tracing").Default("false").Envar("DISABLE_STACK_TRACING").Bool()
-	DisableE2ETracing   = kingpin.Flag("disable-e2e-tracing", "Disable e2e tracing").Default("false").Envar("DISABLE_E2E_TRACING").Bool()
+	DisableE2ETracing   = kingpin.Flag("disable-e2e-tracing", "Disable e2e tracing").Default("true").Envar("DISABLE_E2E_TRACING").Bool()
+	DisableStackTracing = kingpin.Flag("disable-stack-tracing", "Disable stack tracing").Default("true").Envar("DISABLE_STACK_TRACING").Bool()
 	LicenseKey          = kingpin.Flag("license-key", "Apm API key").Default("J45Engw88NeHUZ4Q7qNsK8L47FTH**QvgW113IEnsNaBNMR5zZ**oj/g!!!!").Envar("LICENSE_KEY").String()
 
-	ListenAddress             = kingpin.Flag("listen", "Listen address - ip:port or :port").Default("0.0.0.0:80").Envar("LISTEN").String()
+	ListenAddress             = kingpin.Flag("listen", "Listen address - ip:port or :port").Default("0.0.0.0:8123").Envar("LISTEN").String()
 	CgroupRoot                = kingpin.Flag("cgroupfs-root", "The mount point of the host cgroupfs root").Default("/sys/fs/cgroup").Envar("CGROUPFS_ROOT").String()
 	DisableLogParsing         = kingpin.Flag("disable-log-parsing", "Disable container log parsing").Default("false").Envar("DISABLE_LOG_PARSING").Bool()
 	DisablePinger             = kingpin.Flag("disable-pinger", "Don't ping upstreams").Default("false").Envar("DISABLE_PINGER").Bool()

+ 8 - 2
main.go

@@ -2,6 +2,7 @@ package main
 
 import (
 	"bytes"
+	"github.com/cilium/ebpf/rlimit"
 	"github.com/coroot/coroot-node-agent/utils"
 	"github.com/coroot/coroot-node-agent/utils/enums"
 	log "github.com/sirupsen/logrus"
@@ -33,7 +34,7 @@ var (
 	version = "unknown"
 )
 
-const minSupportedKernelVersion = "4.16"
+const minSupportedKernelVersion = "4.18"
 
 func init() {
 	logs.FormatterInit()
@@ -114,6 +115,7 @@ type MetricData struct {
 }
 
 func main() {
+	runtime.GOMAXPROCS(1)
 	err := logs.InitLog(*flags.LogLevel, logs.LogConfig{
 		Path:       utils.GetDefaultLogPath(),
 		AppInfo:    enums.DaemonProc,
@@ -126,7 +128,11 @@ func main() {
 	if err != nil {
 		log.WithError(err).Errorf("log init error.")
 	}
-
+	if err := rlimit.RemoveMemlock(); err != nil {
+		log.WithError(err).Warning("Failed Removing memlock.")
+	} else {
+		log.Info("Rlimit removed")
+	}
 	//log.LogToStderr(false)
 	//log.SetOutput(&RateLimitedLogOutput{limiter: rate.NewLimiter(rate.Limit(*flags.LogPerSecond), *flags.LogBurst)})