package handlers import ( "io" "log" "net/http" "os" "strings" "sync" "gitea.d3m0k1d.ru/d3m0k1d/HellreigN/backend/internal/graph" "gitea.d3m0k1d.ru/d3m0k1d/HellreigN/backend/internal/grpcsrv/collector" "github.com/gin-gonic/gin" ) // GraphHandlers manages the service dependency graph. type GraphHandlers struct { path string mu sync.RWMutex yamlData []byte loaded *graph.Graph collector *collector.Collector } // NewGraphHandlers loads the graph from the given YAML file path. func NewGraphHandlers(yamlPath string, coll *collector.Collector) *GraphHandlers { h := &GraphHandlers{path: yamlPath, collector: coll} if err := h.reload(); err != nil { if _, ok := err.(*os.PathError); ok { log.Printf("[graph] no graph file at %q, starting with empty graph", yamlPath) h.loaded = graph.New() h.yamlData = []byte("nodes: {}\n") } else { log.Fatalf("[graph] failed to load graph from %q: %v", yamlPath, err) } } return h } func (h *GraphHandlers) reload() error { data, err := os.ReadFile(h.path) if err != nil { return err } g, err := graph.ParseYAML(data) if err != nil { return err } h.mu.Lock() h.yamlData = data h.loaded = g h.mu.Unlock() return nil } // GetGraph returns the current parsed graph. func (h *GraphHandlers) GetGraph() *graph.Graph { h.mu.RLock() defer h.mu.RUnlock() return h.loaded } // GetYAML returns the raw YAML content. // @Summary Get dependency graph YAML // @Description Returns the service dependency graph as raw YAML text // @Tags graph // @Produce plain // @Success 200 {string} string "YAML content" // @Security Bearer // @Router /graph [get] func (h *GraphHandlers) GetYAML(c *gin.Context) { h.mu.RLock() defer h.mu.RUnlock() c.Data(http.StatusOK, "text/yaml", h.yamlData) } // UpdateYAML updates the graph from new YAML text. // @Summary Update dependency graph YAML // @Description Replaces the service dependency graph YAML and reloads it // @Tags graph // @Accept plain // @Produce json // @Param body body string true "New YAML content" // @Success 200 {object} map[string]string // @Failure 400 {object} map[string]string // @Security Bearer // @Router /graph [put] func (h *GraphHandlers) UpdateYAML(c *gin.Context) { body, err := io.ReadAll(c.Request.Body) if err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": "failed to read body"}) return } g, err := graph.ParseYAML(body) if err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return } if err := os.WriteFile(h.path, body, 0o644); err != nil { c.JSON(http.StatusInternalServerError, gin.H{"error": "failed to write graph file"}) return } h.mu.Lock() h.yamlData = body h.loaded = g h.mu.Unlock() log.Printf("[graph] updated graph from admin, saved to %s", h.path) c.JSON(http.StatusOK, gin.H{"message": "graph updated"}) } // StartupOrder returns the computed service startup order. // @Summary Get startup order // @Description Returns the topologically sorted service startup order // @Tags graph // @Produce json // @Success 200 {array} string // @Failure 400 {object} map[string]string // @Security Bearer // @Router /graph/order [get] func (h *GraphHandlers) StartupOrder(c *gin.Context) { h.mu.RLock() g := h.loaded h.mu.RUnlock() order, err := g.TopologicalSort() if err != nil { c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()}) return } c.JSON(http.StatusOK, order) } // CycleCheck checks if the graph has cycles. // @Summary Check for cycles // @Description Returns whether the dependency graph contains cycles // @Tags graph // @Produce json // @Success 200 {object} map[string]bool // @Security Bearer // @Router /graph/cycle [get] func (h *GraphHandlers) CycleCheck(c *gin.Context) { h.mu.RLock() g := h.loaded h.mu.RUnlock() c.JSON(http.StatusOK, gin.H{"has_cycle": g.HasCycle()}) } // ServiceStatusOut represents a service and its current status. type ServiceStatusOut struct { NodeID string `json:"node_id"` Name string `json:"name"` Status string `json:"status"` Healthy bool `json:"healthy"` } // FailureRootCauseOut represents the result of a failure analysis. type FailureRootCauseOut struct { Affected ServiceStatusOut `json:"affected"` RootCause *ServiceStatusOut `json:"root_cause,omitempty"` DependencyChain []string `json:"dependency_chain,omitempty"` } // GetFailureRootCause analyzes the dependency graph and current service // statuses to find the root cause of a service failure. // If the specified service is unhealthy, it traverses its dependencies // to find the first unhealthy dependency — the one that is the root cause. // @Summary Find failure root cause // @Description Analyzes dependencies and service statuses to find the root cause of a failure // @Tags graph // @Param node_id query string false "Node ID (agent label)" // @Param service query string true "Service name" // @Produce json // @Success 200 {object} FailureRootCauseOut // @Failure 400 {object} map[string]string // @Security Bearer // @Router /graph/failure [get] func (h *GraphHandlers) GetFailureRootCause(c *gin.Context) { nodeID := c.Query("node_id") svcName := c.Query("service") if svcName == "" { c.JSON(http.StatusBadRequest, gin.H{"error": "service query param is required"}) return } h.mu.RLock() g := h.loaded h.mu.RUnlock() if g == nil { c.JSON(http.StatusBadRequest, gin.H{"error": "no graph loaded"}) return } // Build a map of service statuses from all agents svcStatus := h.buildServiceStatusMap() // If no node specified, search all nodes for the service if nodeID == "" { for _, node := range g.Nodes() { if _, ok := g.GetService(node.ID, svcName); ok { nodeID = node.ID break } } } if nodeID == "" { c.JSON(http.StatusNotFound, gin.H{"error": "service not found in graph"}) return } if _, ok := g.GetService(nodeID, svcName); !ok { c.JSON(http.StatusNotFound, gin.H{"error": "service not found in node"}) return } // Get current status status := svcStatus[nodeID+":"+svcName] affected := ServiceStatusOut{ NodeID: nodeID, Name: svcName, Status: status.status, Healthy: status.healthy, } // If the service is healthy, no failure to analyze if status.healthy { c.JSON(http.StatusOK, FailureRootCauseOut{ Affected: affected, }) return } // Find root cause: traverse dependencies to find the first unhealthy one rootCause, chain := findRootCause(g, nodeID, svcName, svcStatus) out := FailureRootCauseOut{ Affected: affected, DependencyChain: chain, } if rootCause != nil { out.RootCause = rootCause } c.JSON(http.StatusOK, out) } // svcStatusEntry holds parsed status info. type svcStatusEntry struct { status string healthy bool } // buildServiceStatusMap creates a map of "nodeID:serviceName" → status. // Matches graph nodes to agent labels in the collector. func (h *GraphHandlers) buildServiceStatusMap() map[string]svcStatusEntry { result := make(map[string]svcStatusEntry) h.mu.RLock() nodes := h.loaded.Nodes() h.mu.RUnlock() for _, agent := range h.collector.Agents() { for _, svc := range agent.Services { healthy := isHealthyStatus(svc.Status) entry := svcStatusEntry{status: svc.Status, healthy: healthy} // Try exact node match first key := agent.Label + ":" + svc.Name result[key] = entry // Also register under all nodes that don't have a status yet for _, node := range nodes { nodeKey := node.ID + ":" + svc.Name if _, exists := result[nodeKey]; !exists { result[nodeKey] = entry } } } } return result } // findRootCause traverses the dependency graph to find the first unhealthy dependency. func findRootCause(g *graph.Graph, nodeID, svcName string, statusMap map[string]svcStatusEntry) (*ServiceStatusOut, []string) { visited := make(map[string]bool) var chain []string var dfs func(string, string) *ServiceStatusOut dfs = func(nid, sname string) *ServiceStatusOut { key := nid + ":" + sname chain = append(chain, key) visited[key] = true svc, ok := g.GetService(nid, sname) if !ok { return nil } // Check each dependency for _, dep := range svc.Dependencies { depNodeID := dep.Target.NodeID if depNodeID == "" { depNodeID = nid } depKey := depNodeID + ":" + dep.Target.Name if visited[depKey] { continue // avoid loops } depStatus := statusMap[depKey] if !depStatus.healthy { // This dependency is unhealthy — check if IT has an unhealthy dependency // (to find the true root cause) if deeper := dfs(depNodeID, dep.Target.Name); deeper != nil { return deeper } // This is the root cause return &ServiceStatusOut{ NodeID: depNodeID, Name: dep.Target.Name, Status: depStatus.status, Healthy: false, } } } return nil } root := dfs(nodeID, svcName) // Deduplicate chain seen := make(map[string]bool) var deduped []string for _, k := range chain { if !seen[k] { seen[k] = true deduped = append(deduped, k) } } return root, deduped } func isHealthyStatus(status string) bool { s := strings.ToLower(status) return s == "running" || s == "up" || s == "healthy" }