chore: add system metrics
This commit is contained in:
@@ -200,6 +200,7 @@ func main() {
|
||||
agentsGroup.Use(auth.AuthMiddleware(), handlers.RequireManageAgent())
|
||||
{
|
||||
agentsGroup.GET("", agents.List)
|
||||
agentsGroup.GET("/system-metrics", agents.GetSystemMetrics)
|
||||
}
|
||||
|
||||
// Jobs (requires admin permission)
|
||||
|
||||
@@ -177,6 +177,37 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"/agents/system-metrics": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"Bearer": []
|
||||
}
|
||||
],
|
||||
"description": "Returns CPU, RAM, disk, and network usage metrics for all connected agents",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"agents"
|
||||
],
|
||||
"summary": "Get agent system metrics",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/internal_handlers.AgentSystemMetricsOut"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/auth/login": {
|
||||
"post": {
|
||||
"description": "Authenticate with login and password, returns a token and permissions",
|
||||
@@ -2706,6 +2737,43 @@ const docTemplate = `{
|
||||
}
|
||||
}
|
||||
},
|
||||
"internal_handlers.AgentSystemMetricsOut": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"connected_at": {
|
||||
"type": "string",
|
||||
"example": "2026-04-04 10:30:00"
|
||||
},
|
||||
"cpu_percent": {
|
||||
"type": "number",
|
||||
"example": 45.2
|
||||
},
|
||||
"disk_percent": {
|
||||
"type": "number",
|
||||
"example": 78.9
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"example": "agent-001"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"example": "web-server-1"
|
||||
},
|
||||
"memory_percent": {
|
||||
"type": "number",
|
||||
"example": 62.5
|
||||
},
|
||||
"network_rx_bytes": {
|
||||
"type": "number",
|
||||
"example": 1048576
|
||||
},
|
||||
"network_tx_bytes": {
|
||||
"type": "number",
|
||||
"example": 524288
|
||||
}
|
||||
}
|
||||
},
|
||||
"internal_handlers.CheckCmdIn": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
||||
@@ -166,6 +166,37 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"/agents/system-metrics": {
|
||||
"get": {
|
||||
"security": [
|
||||
{
|
||||
"Bearer": []
|
||||
}
|
||||
],
|
||||
"description": "Returns CPU, RAM, disk, and network usage metrics for all connected agents",
|
||||
"consumes": [
|
||||
"application/json"
|
||||
],
|
||||
"produces": [
|
||||
"application/json"
|
||||
],
|
||||
"tags": [
|
||||
"agents"
|
||||
],
|
||||
"summary": "Get agent system metrics",
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"schema": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"$ref": "#/definitions/internal_handlers.AgentSystemMetricsOut"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/auth/login": {
|
||||
"post": {
|
||||
"description": "Authenticate with login and password, returns a token and permissions",
|
||||
@@ -2695,6 +2726,43 @@
|
||||
}
|
||||
}
|
||||
},
|
||||
"internal_handlers.AgentSystemMetricsOut": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"connected_at": {
|
||||
"type": "string",
|
||||
"example": "2026-04-04 10:30:00"
|
||||
},
|
||||
"cpu_percent": {
|
||||
"type": "number",
|
||||
"example": 45.2
|
||||
},
|
||||
"disk_percent": {
|
||||
"type": "number",
|
||||
"example": 78.9
|
||||
},
|
||||
"id": {
|
||||
"type": "string",
|
||||
"example": "agent-001"
|
||||
},
|
||||
"label": {
|
||||
"type": "string",
|
||||
"example": "web-server-1"
|
||||
},
|
||||
"memory_percent": {
|
||||
"type": "number",
|
||||
"example": 62.5
|
||||
},
|
||||
"network_rx_bytes": {
|
||||
"type": "number",
|
||||
"example": 1048576
|
||||
},
|
||||
"network_tx_bytes": {
|
||||
"type": "number",
|
||||
"example": 524288
|
||||
}
|
||||
}
|
||||
},
|
||||
"internal_handlers.CheckCmdIn": {
|
||||
"type": "object",
|
||||
"required": [
|
||||
|
||||
@@ -374,6 +374,33 @@ definitions:
|
||||
example: agent-001
|
||||
type: string
|
||||
type: object
|
||||
internal_handlers.AgentSystemMetricsOut:
|
||||
properties:
|
||||
connected_at:
|
||||
example: "2026-04-04 10:30:00"
|
||||
type: string
|
||||
cpu_percent:
|
||||
example: 45.2
|
||||
type: number
|
||||
disk_percent:
|
||||
example: 78.9
|
||||
type: number
|
||||
id:
|
||||
example: agent-001
|
||||
type: string
|
||||
label:
|
||||
example: web-server-1
|
||||
type: string
|
||||
memory_percent:
|
||||
example: 62.5
|
||||
type: number
|
||||
network_rx_bytes:
|
||||
example: 1048576
|
||||
type: number
|
||||
network_tx_bytes:
|
||||
example: 524288
|
||||
type: number
|
||||
type: object
|
||||
internal_handlers.CheckCmdIn:
|
||||
properties:
|
||||
command:
|
||||
@@ -619,6 +646,26 @@ paths:
|
||||
summary: Create registration token
|
||||
tags:
|
||||
- agents
|
||||
/agents/system-metrics:
|
||||
get:
|
||||
consumes:
|
||||
- application/json
|
||||
description: Returns CPU, RAM, disk, and network usage metrics for all connected
|
||||
agents
|
||||
produces:
|
||||
- application/json
|
||||
responses:
|
||||
"200":
|
||||
description: OK
|
||||
schema:
|
||||
items:
|
||||
$ref: '#/definitions/internal_handlers.AgentSystemMetricsOut'
|
||||
type: array
|
||||
security:
|
||||
- Bearer: []
|
||||
summary: Get agent system metrics
|
||||
tags:
|
||||
- agents
|
||||
/auth/login:
|
||||
post:
|
||||
consumes:
|
||||
|
||||
@@ -157,3 +157,8 @@ func (c *Collector) GetAgent(name string) (*Agent, bool) {
|
||||
func (c *Collector) Agents() []*Agent {
|
||||
return c.tracker.Agents()
|
||||
}
|
||||
|
||||
// GetSystemMetrics delegates to the tracker.
|
||||
func (c *Collector) GetSystemMetrics() map[string]AgentMetricsInfo {
|
||||
return c.tracker.GetSystemMetrics()
|
||||
}
|
||||
|
||||
@@ -36,3 +36,35 @@ func (c *Collector) ReportServices(ctx context.Context, req *proto.ServicesUpdat
|
||||
|
||||
return &proto.ServicesUpdateResp{}, nil
|
||||
}
|
||||
|
||||
// ReportSystemMetrics handles system metrics update from an agent.
|
||||
// Agents send their current system metrics (CPU, RAM, disk, network).
|
||||
func (c *Collector) ReportSystemMetrics(ctx context.Context, req *proto.SystemMetrics) (*proto.SystemMetricsResp, error) {
|
||||
md, ok := metadata.FromIncomingContext(ctx)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("no metadata in context")
|
||||
}
|
||||
|
||||
whoamiVals := md["whoami"]
|
||||
if len(whoamiVals) == 0 {
|
||||
return nil, fmt.Errorf("whoami metadata missing")
|
||||
}
|
||||
agentName := whoamiVals[0]
|
||||
|
||||
metrics := SystemMetrics{
|
||||
CPUPercent: req.CpuPercent,
|
||||
MemoryPercent: req.MemoryPercent,
|
||||
DiskPercent: req.DiskPercent,
|
||||
NetworkRxBytes: req.NetworkRxBytes,
|
||||
NetworkTxBytes: req.NetworkTxBytes,
|
||||
}
|
||||
|
||||
if ok := c.tracker.UpdateSystemMetrics(agentName, metrics); ok {
|
||||
log.Printf("Updated system metrics for agent %s: CPU=%.1f%%, RAM=%.1f%%, Disk=%.1f%%",
|
||||
agentName, metrics.CPUPercent, metrics.MemoryPercent, metrics.DiskPercent)
|
||||
} else {
|
||||
log.Printf("Warning: received system metrics for unknown agent %s", agentName)
|
||||
}
|
||||
|
||||
return &proto.SystemMetricsResp{}, nil
|
||||
}
|
||||
|
||||
@@ -97,15 +97,69 @@ func (t *ConnTracker) UpdateServices(id string, services []Service) bool {
|
||||
return true
|
||||
}
|
||||
|
||||
// UpdateSystemMetrics updates the system metrics for the given agent.
|
||||
func (t *ConnTracker) UpdateSystemMetrics(id string, metrics SystemMetrics) bool {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
agent, ok := t.agents[id]
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
agent.SystemMetrics = metrics
|
||||
return true
|
||||
}
|
||||
|
||||
// GetSystemMetrics returns system metrics for all connected agents.
|
||||
func (t *ConnTracker) GetSystemMetrics() map[string]AgentMetricsInfo {
|
||||
t.mu.RLock()
|
||||
defer t.mu.RUnlock()
|
||||
result := make(map[string]AgentMetricsInfo)
|
||||
for id, agent := range t.agents {
|
||||
result[id] = AgentMetricsInfo{
|
||||
ID: id,
|
||||
Label: agent.Label,
|
||||
ConnectedAt: agent.ConnectedAt,
|
||||
CPUPercent: agent.SystemMetrics.CPUPercent,
|
||||
MemoryPercent: agent.SystemMetrics.MemoryPercent,
|
||||
DiskPercent: agent.SystemMetrics.DiskPercent,
|
||||
NetworkRxBytes: agent.SystemMetrics.NetworkRxBytes,
|
||||
NetworkTxBytes: agent.SystemMetrics.NetworkTxBytes,
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Service represents a named service with its current status.
|
||||
type Service struct {
|
||||
Name, Status string
|
||||
}
|
||||
|
||||
// SystemMetrics represents system resource metrics.
|
||||
type SystemMetrics struct {
|
||||
CPUPercent float64
|
||||
MemoryPercent float64
|
||||
DiskPercent float64
|
||||
NetworkRxBytes float64
|
||||
NetworkTxBytes float64
|
||||
}
|
||||
|
||||
// AgentMetricsInfo contains agent info with its system metrics.
|
||||
type AgentMetricsInfo struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
ConnectedAt time.Time `json:"connected_at"`
|
||||
CPUPercent float64 `json:"cpu_percent"`
|
||||
MemoryPercent float64 `json:"memory_percent"`
|
||||
DiskPercent float64 `json:"disk_percent"`
|
||||
NetworkRxBytes float64 `json:"network_rx_bytes"`
|
||||
NetworkTxBytes float64 `json:"network_tx_bytes"`
|
||||
}
|
||||
|
||||
// Agent represents a connected agent streaming logs to the collector.
|
||||
type Agent struct {
|
||||
ID string
|
||||
Label string
|
||||
Services []Service
|
||||
ConnectedAt time.Time
|
||||
ID string
|
||||
Label string
|
||||
Services []Service
|
||||
SystemMetrics SystemMetrics
|
||||
ConnectedAt time.Time
|
||||
}
|
||||
|
||||
@@ -51,3 +51,44 @@ func (ag *AgentsGroup) List(c *gin.Context) {
|
||||
|
||||
c.JSON(http.StatusOK, agents)
|
||||
}
|
||||
|
||||
// AgentSystemMetricsOut represents system metrics for a single agent.
|
||||
type AgentSystemMetricsOut struct {
|
||||
ID string `json:"id" example:"agent-001"`
|
||||
Label string `json:"label" example:"web-server-1"`
|
||||
ConnectedAt string `json:"connected_at" example:"2026-04-04 10:30:00"`
|
||||
CPUPercent float64 `json:"cpu_percent" example:"45.2"`
|
||||
MemoryPercent float64 `json:"memory_percent" example:"62.5"`
|
||||
DiskPercent float64 `json:"disk_percent" example:"78.9"`
|
||||
NetworkRxBytes float64 `json:"network_rx_bytes" example:"1048576.0"`
|
||||
NetworkTxBytes float64 `json:"network_tx_bytes" example:"524288.0"`
|
||||
}
|
||||
|
||||
// GetSystemMetrics returns system load metrics for all connected agents.
|
||||
// @Summary Get agent system metrics
|
||||
// @Description Returns CPU, RAM, disk, and network usage metrics for all connected agents
|
||||
// @Tags agents
|
||||
// @Security Bearer
|
||||
// @Accept json
|
||||
// @Produce json
|
||||
// @Success 200 {array} AgentSystemMetricsOut
|
||||
// @Router /agents/system-metrics [get]
|
||||
func (ag *AgentsGroup) GetSystemMetrics(c *gin.Context) {
|
||||
metricsMap := ag.collector.GetSystemMetrics()
|
||||
|
||||
metrics := make([]AgentSystemMetricsOut, 0, len(metricsMap))
|
||||
for _, m := range metricsMap {
|
||||
metrics = append(metrics, AgentSystemMetricsOut{
|
||||
ID: m.ID,
|
||||
Label: m.Label,
|
||||
ConnectedAt: m.ConnectedAt.Format("2006-01-02 15:04:05"),
|
||||
CPUPercent: m.CPUPercent,
|
||||
MemoryPercent: m.MemoryPercent,
|
||||
DiskPercent: m.DiskPercent,
|
||||
NetworkRxBytes: m.NetworkRxBytes,
|
||||
NetworkTxBytes: m.NetworkTxBytes,
|
||||
})
|
||||
}
|
||||
|
||||
c.JSON(http.StatusOK, metrics)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user