From 247505a31064a80247587c70575f80822e07c2c0 Mon Sep 17 00:00:00 2001 From: "zero@thinky" Date: Sun, 5 Apr 2026 08:22:41 +0300 Subject: [PATCH] feat(backend): add root cause calculation --- backend/cmd/main.go | 24 +++++++---- backend/docs/docs.go | 83 +++++++++++++++++++++++++++++++++++++++ backend/docs/swagger.json | 83 +++++++++++++++++++++++++++++++++++++++ backend/docs/swagger.yaml | 54 +++++++++++++++++++++++++ 4 files changed, 237 insertions(+), 7 deletions(-) diff --git a/backend/cmd/main.go b/backend/cmd/main.go index 3f9f679..0e712bf 100644 --- a/backend/cmd/main.go +++ b/backend/cmd/main.go @@ -218,15 +218,25 @@ func main() { jobsGroup.GET("/metrics", jobsHandlers.GetJobMetrics) } - // Service dependency graph (requires admin permission) + // Service dependency graph graphGroup := v1.Group("/graph") - graphGroup.Use(auth.AuthMiddleware(), handlers.RequireAdmin()) + graphGroup.Use(auth.AuthMiddleware()) { - graphGroup.GET("", graphHandlers.GetYAML) - graphGroup.PUT("", graphHandlers.UpdateYAML) - graphGroup.GET("/order", graphHandlers.StartupOrder) - graphGroup.GET("/cycle", graphHandlers.CycleCheck) - graphGroup.GET("/failure", graphHandlers.GetFailureRootCause) + // Read-only endpoints: GET (require view) + graphView := graphGroup.Group("") + graphView.Use(handlers.RequireView()) + { + graphView.GET("", graphHandlers.GetYAML) + graphView.GET("/order", graphHandlers.StartupOrder) + graphView.GET("/cycle", graphHandlers.CycleCheck) + graphView.GET("/failure", graphHandlers.GetFailureRootCause) + } + // Write endpoints: PUT (require admin) + graphAdmin := graphGroup.Group("") + graphAdmin.Use(handlers.RequireAdmin()) + { + graphAdmin.PUT("", graphHandlers.UpdateYAML) + } } // Agent registration diff --git a/backend/docs/docs.go b/backend/docs/docs.go index 3445cbd..c1fac0e 100644 --- a/backend/docs/docs.go +++ b/backend/docs/docs.go @@ -1096,6 +1096,55 @@ const docTemplate = `{ } } }, + "/graph/failure": { + "get": { + "security": [ + { + "Bearer": [] + } + ], + "description": "Analyzes dependencies and service statuses to find the root cause of a failure", + "produces": [ + "application/json" + ], + "tags": [ + "graph" + ], + "summary": "Find failure root cause", + "parameters": [ + { + "type": "string", + "description": "Node ID (agent label)", + "name": "node_id", + "in": "query" + }, + { + "type": "string", + "description": "Service name", + "name": "service", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/internal_handlers.FailureRootCauseOut" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, "/graph/order": { "get": { "security": [ @@ -2897,6 +2946,23 @@ const docTemplate = `{ } } }, + "internal_handlers.FailureRootCauseOut": { + "type": "object", + "properties": { + "affected": { + "$ref": "#/definitions/internal_handlers.ServiceStatusOut" + }, + "dependency_chain": { + "type": "array", + "items": { + "type": "string" + } + }, + "root_cause": { + "$ref": "#/definitions/internal_handlers.ServiceStatusOut" + } + } + }, "internal_handlers.InsertLogRequest": { "type": "object", "required": [ @@ -3061,6 +3127,23 @@ const docTemplate = `{ "type": "string" } } + }, + "internal_handlers.ServiceStatusOut": { + "type": "object", + "properties": { + "healthy": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "node_id": { + "type": "string" + }, + "status": { + "type": "string" + } + } } }, "securityDefinitions": { diff --git a/backend/docs/swagger.json b/backend/docs/swagger.json index 14e776b..ac2ee40 100644 --- a/backend/docs/swagger.json +++ b/backend/docs/swagger.json @@ -1085,6 +1085,55 @@ } } }, + "/graph/failure": { + "get": { + "security": [ + { + "Bearer": [] + } + ], + "description": "Analyzes dependencies and service statuses to find the root cause of a failure", + "produces": [ + "application/json" + ], + "tags": [ + "graph" + ], + "summary": "Find failure root cause", + "parameters": [ + { + "type": "string", + "description": "Node ID (agent label)", + "name": "node_id", + "in": "query" + }, + { + "type": "string", + "description": "Service name", + "name": "service", + "in": "query", + "required": true + } + ], + "responses": { + "200": { + "description": "OK", + "schema": { + "$ref": "#/definitions/internal_handlers.FailureRootCauseOut" + } + }, + "400": { + "description": "Bad Request", + "schema": { + "type": "object", + "additionalProperties": { + "type": "string" + } + } + } + } + } + }, "/graph/order": { "get": { "security": [ @@ -2886,6 +2935,23 @@ } } }, + "internal_handlers.FailureRootCauseOut": { + "type": "object", + "properties": { + "affected": { + "$ref": "#/definitions/internal_handlers.ServiceStatusOut" + }, + "dependency_chain": { + "type": "array", + "items": { + "type": "string" + } + }, + "root_cause": { + "$ref": "#/definitions/internal_handlers.ServiceStatusOut" + } + } + }, "internal_handlers.InsertLogRequest": { "type": "object", "required": [ @@ -3050,6 +3116,23 @@ "type": "string" } } + }, + "internal_handlers.ServiceStatusOut": { + "type": "object", + "properties": { + "healthy": { + "type": "boolean" + }, + "name": { + "type": "string" + }, + "node_id": { + "type": "string" + }, + "status": { + "type": "string" + } + } } }, "securityDefinitions": { diff --git a/backend/docs/swagger.yaml b/backend/docs/swagger.yaml index 2c2b740..ce58038 100644 --- a/backend/docs/swagger.yaml +++ b/backend/docs/swagger.yaml @@ -417,6 +417,17 @@ definitions: required: - path type: object + internal_handlers.FailureRootCauseOut: + properties: + affected: + $ref: '#/definitions/internal_handlers.ServiceStatusOut' + dependency_chain: + items: + type: string + type: array + root_cause: + $ref: '#/definitions/internal_handlers.ServiceStatusOut' + type: object internal_handlers.InsertLogRequest: properties: agent: @@ -527,6 +538,17 @@ definitions: required: - token type: object + internal_handlers.ServiceStatusOut: + properties: + healthy: + type: boolean + name: + type: string + node_id: + type: string + status: + type: string + type: object info: contact: {} paths: @@ -1220,6 +1242,38 @@ paths: summary: Check for cycles tags: - graph + /graph/failure: + get: + description: Analyzes dependencies and service statuses to find the root cause + of a failure + parameters: + - description: Node ID (agent label) + in: query + name: node_id + type: string + - description: Service name + in: query + name: service + required: true + type: string + produces: + - application/json + responses: + "200": + description: OK + schema: + $ref: '#/definitions/internal_handlers.FailureRootCauseOut' + "400": + description: Bad Request + schema: + additionalProperties: + type: string + type: object + security: + - Bearer: [] + summary: Find failure root cause + tags: + - graph /graph/order: get: description: Returns the topologically sorted service startup order