feat(backend): implement service monitor proto & connect it to http /agents
ci-agent / build (push) Failing after 2m30s

This commit is contained in:
2026-04-04 19:59:32 +03:00
parent 1d75935a08
commit 4ea1aec6e2
8 changed files with 99 additions and 15 deletions
+18 -4
View File
@@ -23,6 +23,9 @@ const docTemplate = `{
}
],
"description": "Returns a list of all agents currently connected via Collector (log streaming)",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
@@ -1966,19 +1969,30 @@ const docTemplate = `{
"type": "object",
"properties": {
"connected_at": {
"type": "string"
"description": "Time when agent connected (RFC3339-like)",
"type": "string",
"example": "2026-04-04 10:30:00"
},
"label": {
"type": "string"
"description": "Human-readable label",
"type": "string",
"example": "web-server-1"
},
"services": {
"description": "List of services with status (format: \"name:status\")",
"type": "array",
"items": {
"type": "string"
}
},
"example": [
"nginx:running",
"redis:up"
]
},
"token": {
"type": "string"
"description": "Unique agent identifier",
"type": "string",
"example": "agent-001"
}
}
},
+18 -4
View File
@@ -12,6 +12,9 @@
}
],
"description": "Returns a list of all agents currently connected via Collector (log streaming)",
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
@@ -1955,19 +1958,30 @@
"type": "object",
"properties": {
"connected_at": {
"type": "string"
"description": "Time when agent connected (RFC3339-like)",
"type": "string",
"example": "2026-04-04 10:30:00"
},
"label": {
"type": "string"
"description": "Human-readable label",
"type": "string",
"example": "web-server-1"
},
"services": {
"description": "List of services with status (format: \"name:status\")",
"type": "array",
"items": {
"type": "string"
}
},
"example": [
"nginx:running",
"redis:up"
]
},
"token": {
"type": "string"
"description": "Unique agent identifier",
"type": "string",
"example": "agent-001"
}
}
},
+12
View File
@@ -306,14 +306,24 @@ definitions:
internal_handlers.AgentInfo:
properties:
connected_at:
description: Time when agent connected (RFC3339-like)
example: "2026-04-04 10:30:00"
type: string
label:
description: Human-readable label
example: web-server-1
type: string
services:
description: 'List of services with status (format: "name:status")'
example:
- nginx:running
- redis:up
items:
type: string
type: array
token:
description: Unique agent identifier
example: agent-001
type: string
type: object
internal_handlers.InsertLogRequest:
@@ -397,6 +407,8 @@ info:
paths:
/agents:
get:
consumes:
- application/json
description: Returns a list of all agents currently connected via Collector
(log streaming)
produces:
+1 -1
View File
@@ -3,7 +3,7 @@ module gitea.d3m0k1d.ru/d3m0k1d/HellreigN/backend
go 1.26.1
require (
gitea.d3m0k1d.ru/d3m0k1d/HellreigN/proto v0.0.0-20260403210401-a6212c89fc0e
gitea.d3m0k1d.ru/d3m0k1d/HellreigN/proto v0.0.0-20260404165608-1d75935a08a5
github.com/ClickHouse/clickhouse-go/v2 v2.44.0
github.com/gin-gonic/gin v1.12.0
github.com/samber/lo v1.53.0
@@ -186,3 +186,45 @@ func (c *Collector) Agents() []*Agent {
}
return result
}
// ServicesStream handles the ServicesUpdate client-streaming RPC.
// Agents send service status updates which are stored in the collector.
// Returns a single response when the agent closes the stream.
func (c *Collector) ServicesStream(stream proto.Collector_ServicesStreamServer) error {
md, ok := metadata.FromIncomingContext(stream.Context())
if !ok {
return fmt.Errorf("no metadata in context")
}
whoamiVals := md["whoami"]
if len(whoamiVals) == 0 {
return fmt.Errorf("whoami metadata missing")
}
agentName := whoamiVals[0]
log.Printf("Agent %s started services update stream", agentName)
for {
update, err := stream.Recv()
if err == io.EOF {
log.Printf("Agent %s finished services update stream", agentName)
return stream.SendAndClose(&proto.ServicesUpdateResp{})
}
if err != nil {
return fmt.Errorf("failed to receive services update: %w", err)
}
c.mu.Lock()
if agent, ok := c.agents[agentName]; ok {
services := make([]string, 0, len(update.Services))
for _, s := range update.Services {
services = append(services, fmt.Sprintf("%s:%s", s.Name, s.Status))
}
agent.Services = services
log.Printf("Updated services for agent %s: %v", agentName, agent.Services)
} else {
log.Printf("Warning: received services update for unknown agent %s", agentName)
}
c.mu.Unlock()
}
}
+6 -4
View File
@@ -16,17 +16,19 @@ func NewAgentsGroup(h *Handlers, coll *collector.Collector) AgentsGroup {
return AgentsGroup{Handlers: h, collector: coll}
}
// AgentInfo represents a connected agent's current status.
type AgentInfo struct {
Token string `json:"token"`
Label string `json:"label"`
Services []string `json:"services"`
ConnectedAt string `json:"connected_at"`
Token string `json:"token" example:"agent-001"` // Unique agent identifier
Label string `json:"label" example:"web-server-1"` // Human-readable label
Services []string `json:"services" example:"nginx:running,redis:up"` // List of services with status (format: "name:status")
ConnectedAt string `json:"connected_at" example:"2026-04-04 10:30:00"` // Time when agent connected (RFC3339-like)
}
// @Summary Get connected agents
// @Description Returns a list of all agents currently connected via Collector (log streaming)
// @Tags agents
// @Security Bearer
// @Accept json
// @Produce json
// @Success 200 {array} AgentInfo
// @Router /agents [get]
+1 -2
View File
@@ -4,7 +4,6 @@ import (
"errors"
"fmt"
"net/http"
"os"
"os/exec"
"gitea.d3m0k1d.ru/d3m0k1d/HellreigN/backend/internal/grpcsrv/commander"
@@ -82,7 +81,7 @@ func (self *JobsHandlers) AddJob(c *gin.Context) {
return err
}
job, err := agent.WaitJob(jid)
if err != nil && !errors.Is(err, &exec.ExitError{}) {
if err != nil && !errors.As(err, &exec.ExitError{}) {
return err
}
c.JSON(http.StatusCreated, AddJobOut{
+1
View File
@@ -5,6 +5,7 @@ import (
"encoding/hex"
)
// TOOD: fuck
func RandomToken() (string, error) {
token := make([]byte, 32)
if _, err := rand.Read(token); err != nil {