feat(backend): add root cause calculation
ci-agent / build (push) Has started running

This commit is contained in:
2026-04-05 08:22:41 +03:00
parent ad9d567d2c
commit 247505a310
4 changed files with 237 additions and 7 deletions
+17 -7
View File
@@ -218,15 +218,25 @@ func main() {
jobsGroup.GET("/metrics", jobsHandlers.GetJobMetrics) jobsGroup.GET("/metrics", jobsHandlers.GetJobMetrics)
} }
// Service dependency graph (requires admin permission) // Service dependency graph
graphGroup := v1.Group("/graph") graphGroup := v1.Group("/graph")
graphGroup.Use(auth.AuthMiddleware(), handlers.RequireAdmin()) graphGroup.Use(auth.AuthMiddleware())
{ {
graphGroup.GET("", graphHandlers.GetYAML) // Read-only endpoints: GET (require view)
graphGroup.PUT("", graphHandlers.UpdateYAML) graphView := graphGroup.Group("")
graphGroup.GET("/order", graphHandlers.StartupOrder) graphView.Use(handlers.RequireView())
graphGroup.GET("/cycle", graphHandlers.CycleCheck) {
graphGroup.GET("/failure", graphHandlers.GetFailureRootCause) graphView.GET("", graphHandlers.GetYAML)
graphView.GET("/order", graphHandlers.StartupOrder)
graphView.GET("/cycle", graphHandlers.CycleCheck)
graphView.GET("/failure", graphHandlers.GetFailureRootCause)
}
// Write endpoints: PUT (require admin)
graphAdmin := graphGroup.Group("")
graphAdmin.Use(handlers.RequireAdmin())
{
graphAdmin.PUT("", graphHandlers.UpdateYAML)
}
} }
// Agent registration // Agent registration
+83
View File
@@ -1096,6 +1096,55 @@ const docTemplate = `{
} }
} }
}, },
"/graph/failure": {
"get": {
"security": [
{
"Bearer": []
}
],
"description": "Analyzes dependencies and service statuses to find the root cause of a failure",
"produces": [
"application/json"
],
"tags": [
"graph"
],
"summary": "Find failure root cause",
"parameters": [
{
"type": "string",
"description": "Node ID (agent label)",
"name": "node_id",
"in": "query"
},
{
"type": "string",
"description": "Service name",
"name": "service",
"in": "query",
"required": true
}
],
"responses": {
"200": {
"description": "OK",
"schema": {
"$ref": "#/definitions/internal_handlers.FailureRootCauseOut"
}
},
"400": {
"description": "Bad Request",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/graph/order": { "/graph/order": {
"get": { "get": {
"security": [ "security": [
@@ -2897,6 +2946,23 @@ const docTemplate = `{
} }
} }
}, },
"internal_handlers.FailureRootCauseOut": {
"type": "object",
"properties": {
"affected": {
"$ref": "#/definitions/internal_handlers.ServiceStatusOut"
},
"dependency_chain": {
"type": "array",
"items": {
"type": "string"
}
},
"root_cause": {
"$ref": "#/definitions/internal_handlers.ServiceStatusOut"
}
}
},
"internal_handlers.InsertLogRequest": { "internal_handlers.InsertLogRequest": {
"type": "object", "type": "object",
"required": [ "required": [
@@ -3061,6 +3127,23 @@ const docTemplate = `{
"type": "string" "type": "string"
} }
} }
},
"internal_handlers.ServiceStatusOut": {
"type": "object",
"properties": {
"healthy": {
"type": "boolean"
},
"name": {
"type": "string"
},
"node_id": {
"type": "string"
},
"status": {
"type": "string"
}
}
} }
}, },
"securityDefinitions": { "securityDefinitions": {
+83
View File
@@ -1085,6 +1085,55 @@
} }
} }
}, },
"/graph/failure": {
"get": {
"security": [
{
"Bearer": []
}
],
"description": "Analyzes dependencies and service statuses to find the root cause of a failure",
"produces": [
"application/json"
],
"tags": [
"graph"
],
"summary": "Find failure root cause",
"parameters": [
{
"type": "string",
"description": "Node ID (agent label)",
"name": "node_id",
"in": "query"
},
{
"type": "string",
"description": "Service name",
"name": "service",
"in": "query",
"required": true
}
],
"responses": {
"200": {
"description": "OK",
"schema": {
"$ref": "#/definitions/internal_handlers.FailureRootCauseOut"
}
},
"400": {
"description": "Bad Request",
"schema": {
"type": "object",
"additionalProperties": {
"type": "string"
}
}
}
}
}
},
"/graph/order": { "/graph/order": {
"get": { "get": {
"security": [ "security": [
@@ -2886,6 +2935,23 @@
} }
} }
}, },
"internal_handlers.FailureRootCauseOut": {
"type": "object",
"properties": {
"affected": {
"$ref": "#/definitions/internal_handlers.ServiceStatusOut"
},
"dependency_chain": {
"type": "array",
"items": {
"type": "string"
}
},
"root_cause": {
"$ref": "#/definitions/internal_handlers.ServiceStatusOut"
}
}
},
"internal_handlers.InsertLogRequest": { "internal_handlers.InsertLogRequest": {
"type": "object", "type": "object",
"required": [ "required": [
@@ -3050,6 +3116,23 @@
"type": "string" "type": "string"
} }
} }
},
"internal_handlers.ServiceStatusOut": {
"type": "object",
"properties": {
"healthy": {
"type": "boolean"
},
"name": {
"type": "string"
},
"node_id": {
"type": "string"
},
"status": {
"type": "string"
}
}
} }
}, },
"securityDefinitions": { "securityDefinitions": {
+54
View File
@@ -417,6 +417,17 @@ definitions:
required: required:
- path - path
type: object type: object
internal_handlers.FailureRootCauseOut:
properties:
affected:
$ref: '#/definitions/internal_handlers.ServiceStatusOut'
dependency_chain:
items:
type: string
type: array
root_cause:
$ref: '#/definitions/internal_handlers.ServiceStatusOut'
type: object
internal_handlers.InsertLogRequest: internal_handlers.InsertLogRequest:
properties: properties:
agent: agent:
@@ -527,6 +538,17 @@ definitions:
required: required:
- token - token
type: object type: object
internal_handlers.ServiceStatusOut:
properties:
healthy:
type: boolean
name:
type: string
node_id:
type: string
status:
type: string
type: object
info: info:
contact: {} contact: {}
paths: paths:
@@ -1220,6 +1242,38 @@ paths:
summary: Check for cycles summary: Check for cycles
tags: tags:
- graph - graph
/graph/failure:
get:
description: Analyzes dependencies and service statuses to find the root cause
of a failure
parameters:
- description: Node ID (agent label)
in: query
name: node_id
type: string
- description: Service name
in: query
name: service
required: true
type: string
produces:
- application/json
responses:
"200":
description: OK
schema:
$ref: '#/definitions/internal_handlers.FailureRootCauseOut'
"400":
description: Bad Request
schema:
additionalProperties:
type: string
type: object
security:
- Bearer: []
summary: Find failure root cause
tags:
- graph
/graph/order: /graph/order:
get: get:
description: Returns the topologically sorted service startup order description: Returns the topologically sorted service startup order