chore: add system metrics
This commit is contained in:
@@ -0,0 +1,214 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
)
|
||||
|
||||
// SystemMetrics holds current system resource usage.
|
||||
type SystemMetrics struct {
|
||||
CPUPercent float64
|
||||
MemoryPercent float64
|
||||
DiskPercent float64
|
||||
NetworkRxBytes float64
|
||||
NetworkTxBytes float64
|
||||
}
|
||||
|
||||
// Collector collects system metrics from /proc and sysfs.
|
||||
type Collector struct {
|
||||
lastCPUTotal uint64
|
||||
lastCPUIdle uint64
|
||||
lastNetRx float64
|
||||
lastNetTx float64
|
||||
lastNetTime time.Time
|
||||
}
|
||||
|
||||
// NewCollector creates a new metrics collector.
|
||||
func NewCollector() *Collector {
|
||||
return &Collector{}
|
||||
}
|
||||
|
||||
// Collect gathers current system metrics.
|
||||
func (c *Collector) Collect() (SystemMetrics, error) {
|
||||
var m SystemMetrics
|
||||
|
||||
cpu, err := c.readCPU()
|
||||
if err == nil {
|
||||
m.CPUPercent = cpu
|
||||
}
|
||||
|
||||
mem, err := c.readMemory()
|
||||
if err == nil {
|
||||
m.MemoryPercent = mem
|
||||
}
|
||||
|
||||
disk, err := c.readDisk("/")
|
||||
if err == nil {
|
||||
m.DiskPercent = disk
|
||||
}
|
||||
|
||||
netRx, netTx, err := c.readNetwork()
|
||||
if err == nil {
|
||||
m.NetworkRxBytes = netRx
|
||||
m.NetworkTxBytes = netTx
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// readCPU returns CPU usage percentage since last call.
|
||||
func (c *Collector) readCPU() (float64, error) {
|
||||
f, err := os.Open("/proc/stat")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if !strings.HasPrefix(line, "cpu ") {
|
||||
continue
|
||||
}
|
||||
|
||||
fields := strings.Fields(line)
|
||||
if len(fields) < 8 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
var user, nice, system, idle, iowait, irq, softirq uint64
|
||||
user, _ = strconv.ParseUint(fields[1], 10, 64)
|
||||
nice, _ = strconv.ParseUint(fields[2], 10, 64)
|
||||
system, _ = strconv.ParseUint(fields[3], 10, 64)
|
||||
idle, _ = strconv.ParseUint(fields[4], 10, 64)
|
||||
iowait, _ = strconv.ParseUint(fields[5], 10, 64)
|
||||
irq, _ = strconv.ParseUint(fields[6], 10, 64)
|
||||
softirq, _ = strconv.ParseUint(fields[7], 10, 64)
|
||||
|
||||
total := user + nice + system + idle + iowait + irq + softirq
|
||||
idleTotal := idle + iowait
|
||||
|
||||
if c.lastCPUTotal > 0 {
|
||||
totalDiff := total - c.lastCPUTotal
|
||||
idleDiff := idleTotal - c.lastCPUIdle
|
||||
|
||||
if totalDiff > 0 {
|
||||
cpuPercent := float64(totalDiff-idleDiff) / float64(totalDiff) * 100.0
|
||||
c.lastCPUTotal = total
|
||||
c.lastCPUIdle = idleTotal
|
||||
return cpuPercent, nil
|
||||
}
|
||||
}
|
||||
|
||||
c.lastCPUTotal = total
|
||||
c.lastCPUIdle = idleTotal
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
return 0, scanner.Err()
|
||||
}
|
||||
|
||||
// readMemory returns RAM usage percentage.
|
||||
func (c *Collector) readMemory() (float64, error) {
|
||||
f, err := os.Open("/proc/meminfo")
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var total, available uint64
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
if strings.HasPrefix(line, "MemTotal:") {
|
||||
fields := strings.Fields(line)
|
||||
total, _ = strconv.ParseUint(fields[1], 10, 64)
|
||||
} else if strings.HasPrefix(line, "MemAvailable:") {
|
||||
fields := strings.Fields(line)
|
||||
available, _ = strconv.ParseUint(fields[1], 10, 64)
|
||||
}
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
used := total - available
|
||||
return float64(used) / float64(total) * 100.0, nil
|
||||
}
|
||||
|
||||
// readDisk returns disk usage percentage for the given path.
|
||||
func (c *Collector) readDisk(path string) (float64, error) {
|
||||
var stat syscall.Statfs_t
|
||||
if err := syscall.Statfs(path, &stat); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
total := stat.Blocks * uint64(stat.Bsize)
|
||||
free := stat.Bfree * uint64(stat.Bsize)
|
||||
|
||||
if total == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
used := total - free
|
||||
return float64(used) / float64(total) * 100.0, nil
|
||||
}
|
||||
|
||||
// readNetwork returns network RX/TX bytes per second.
|
||||
func (c *Collector) readNetwork() (float64, float64, error) {
|
||||
f, err := os.Open("/proc/net/dev")
|
||||
if err != nil {
|
||||
return 0, 0, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
var totalRx, totalTx uint64
|
||||
scanner := bufio.NewScanner(f)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
// Skip header lines
|
||||
if strings.Contains(line, "|") || strings.HasPrefix(strings.TrimSpace(line), "Inter") {
|
||||
continue
|
||||
}
|
||||
|
||||
parts := strings.SplitN(strings.TrimSpace(line), ":", 2)
|
||||
if len(parts) < 2 {
|
||||
continue
|
||||
}
|
||||
|
||||
fields := strings.Fields(parts[1])
|
||||
if len(fields) < 9 {
|
||||
continue
|
||||
}
|
||||
|
||||
rx, _ := strconv.ParseUint(fields[0], 10, 64)
|
||||
tx, _ := strconv.ParseUint(fields[8], 10, 64)
|
||||
totalRx += rx
|
||||
totalTx += tx
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
var rxRate, txRate float64
|
||||
|
||||
if !c.lastNetTime.IsZero() {
|
||||
elapsed := now.Sub(c.lastNetTime).Seconds()
|
||||
if elapsed > 0 {
|
||||
rxRate = float64(totalRx) - c.lastNetRx
|
||||
txRate = float64(totalTx) - c.lastNetTx
|
||||
// Convert to bytes per second
|
||||
rxRate = rxRate / elapsed
|
||||
txRate = txRate / elapsed
|
||||
}
|
||||
}
|
||||
|
||||
c.lastNetRx = float64(totalRx)
|
||||
c.lastNetTx = float64(totalTx)
|
||||
c.lastNetTime = now
|
||||
|
||||
return rxRate, txRate, nil
|
||||
}
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
"gitea.d3m0k1d.ru/d3m0k1d/HellreigN/agent/internal/client"
|
||||
"gitea.d3m0k1d.ru/d3m0k1d/HellreigN/agent/internal/commander"
|
||||
"gitea.d3m0k1d.ru/d3m0k1d/HellreigN/agent/internal/config"
|
||||
agentmetrics "gitea.d3m0k1d.ru/d3m0k1d/HellreigN/agent/internal/metrics"
|
||||
"gitea.d3m0k1d.ru/d3m0k1d/HellreigN/agent/internal/logger"
|
||||
"gitea.d3m0k1d.ru/d3m0k1d/HellreigN/agent/internal/logsource"
|
||||
"gitea.d3m0k1d.ru/d3m0k1d/HellreigN/agent/internal/logsource/docker"
|
||||
@@ -120,6 +121,11 @@ func main() {
|
||||
})
|
||||
}
|
||||
|
||||
// Start system metrics reporting
|
||||
wg.Go(func() error {
|
||||
return reportSystemMetrics(ctx, grpcAddr, creds, cfg.Label, lgr)
|
||||
})
|
||||
|
||||
// Start log collectors
|
||||
if len(cfg.Services) > 0 {
|
||||
wg.Go(func() error {
|
||||
@@ -370,3 +376,59 @@ func reportServices(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// reportSystemMetrics periodically collects and sends system metrics to the backend via gRPC.
|
||||
func reportSystemMetrics(
|
||||
ctx context.Context,
|
||||
grpcAddr string,
|
||||
creds credentials.TransportCredentials,
|
||||
label string,
|
||||
lgr *logger.Logger,
|
||||
) error {
|
||||
conn, err := grpc.NewClient(grpcAddr, grpc.WithTransportCredentials(creds))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to connect for metrics report: %w", err)
|
||||
}
|
||||
defer conn.Close()
|
||||
|
||||
ccli := proto.NewCollectorClient(conn)
|
||||
collector := agentmetrics.NewCollector()
|
||||
ticker := time.NewTicker(5 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
lgr.Info("System metrics collector started")
|
||||
|
||||
for {
|
||||
metrics, err := collector.Collect()
|
||||
if err != nil {
|
||||
lgr.Warn("Failed to collect system metrics", "err", err)
|
||||
} else {
|
||||
md := metadata.New(map[string]string{"whoami": label})
|
||||
_, err := ccli.ReportSystemMetrics(
|
||||
metadata.NewOutgoingContext(ctx, md),
|
||||
&proto.SystemMetrics{
|
||||
CpuPercent: metrics.CPUPercent,
|
||||
MemoryPercent: metrics.MemoryPercent,
|
||||
DiskPercent: metrics.DiskPercent,
|
||||
NetworkRxBytes: metrics.NetworkRxBytes,
|
||||
NetworkTxBytes: metrics.NetworkTxBytes,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
lgr.Warn("Failed to report system metrics", "err", err)
|
||||
} else {
|
||||
lgr.Debug("System metrics reported",
|
||||
"cpu", metrics.CPUPercent,
|
||||
"mem", metrics.MemoryPercent,
|
||||
"disk", metrics.DiskPercent,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user