fix: Update agent install, disk metrics, fallback blocks, and dynamic thresholds
BREAKING: install.sh now downloads agent from server instead of embedding it Changes: - AgentController: downloadAgent() method for serving agent.py with token auth - AgentController: rewrite generateInstallScript() to curl agent from server - agent.py: copy production version from server (with temp, disk, network metrics) - agent.py: fix get_disk_metrics() to use priority mountpoints (/, /home, etc) - agent.py: fix disk_total_gb collection to use priority mountpoints - detail.twig: add fallback blocks for temperatures (alert-info) - detail.twig: add fallback blocks for disk doughnuts (alert-warning) - detail.twig: add fallback blocks for network graphs (alert-warning) - detail.twig: add null check for ramTotalGB in tooltip - detail.twig: improve thresholds form with human-readable labels and units - ServerDetailController: query only metrics that exist on server and display on graphs For server 3 (mirv.top): - After deploy, download new install.sh and reinstall agent - This will add disk_used_root, ram_total_gb, temperatures support
This commit is contained in:
parent
3255bfae29
commit
bce4c2e2d0
Binary file not shown.
302
agent.py
302
agent.py
|
|
@ -8,11 +8,119 @@ import subprocess
|
|||
import os
|
||||
from datetime import datetime
|
||||
|
||||
# Скипаем виртуальные и служебные интерфейсы
|
||||
SKIP_INTERFACE_PREFIXES = ('lo', 'docker', 'veth', 'br-', 'tun', 'tap', 'wg', 'virbr', 'vmnet', 'vmxnet')
|
||||
|
||||
# Храним предыдущие значения net_io для расчёта дельты
|
||||
_prev_net_io = {}
|
||||
|
||||
|
||||
def _is_real_interface(name, stats):
|
||||
for prefix in SKIP_INTERFACE_PREFIXES:
|
||||
if name.startswith(prefix):
|
||||
return False
|
||||
if not stats.isup:
|
||||
return False
|
||||
if stats.speed <= 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_network_metrics(interval=60):
|
||||
global _prev_net_io
|
||||
metrics = {}
|
||||
try:
|
||||
counters = psutil.net_io_counters(pernic=True)
|
||||
stats = psutil.net_if_stats()
|
||||
now = __import__('time').time()
|
||||
for name, counter in counters.items():
|
||||
if name not in stats:
|
||||
continue
|
||||
if not _is_real_interface(name, stats[name]):
|
||||
continue
|
||||
speed_mbps = stats[name].speed
|
||||
speed_bps = speed_mbps * 1000000 / 8
|
||||
if name in _prev_net_io:
|
||||
prev = _prev_net_io[name]
|
||||
elapsed = now - prev['time']
|
||||
if elapsed > 0:
|
||||
rx_delta = counter.bytes_recv - prev['rx']
|
||||
tx_delta = counter.bytes_sent - prev['tx']
|
||||
rx_pct = min((rx_delta / elapsed) / speed_bps * 100, 100.0)
|
||||
tx_pct = min((tx_delta / elapsed) / speed_bps * 100, 100.0)
|
||||
iface_key = name.replace('-', '_')
|
||||
metrics[f'net_in_{iface_key}'] = round(rx_pct, 2)
|
||||
metrics[f'net_out_{iface_key}'] = round(tx_pct, 2)
|
||||
_prev_net_io[name] = {'rx': counter.bytes_recv, 'tx': counter.bytes_sent, 'time': now}
|
||||
except Exception as e:
|
||||
print(f'Ошибка сбора сетевых метрик: {e}')
|
||||
return metrics
|
||||
|
||||
|
||||
def _is_real_partition(mountpoint, fstype):
|
||||
"""Проверяем что раздел реальный (не tmpfs, docker, snap и т.д.)"""
|
||||
skip_fstypes = {'tmpfs', 'devtmpfs', 'overlay', 'squashfs', 'snap',
|
||||
'devpts', 'proc', 'sysfs', 'cgroup', 'cgroup2',
|
||||
'pstore', 'hugetlbfs', 'mqueue', 'debugfs',
|
||||
'tracefs', 'bpf', 'fusectl', 'configfs',
|
||||
'securityfs', 'ramfs'}
|
||||
skip_mounts = {'/run', '/run/lock', '/sys', '/proc', '/dev',
|
||||
'/dev/shm', '/dev/pts', '/sys/fs/cgroup'}
|
||||
|
||||
if fstype in skip_fstypes:
|
||||
return False
|
||||
if mountpoint in skip_mounts:
|
||||
return False
|
||||
# Пропускаем EFI — слишком маленький, не информативен
|
||||
if mountpoint == '/boot/efi':
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def get_disk_metrics():
|
||||
"""Собираем метрики диска для примонтированных разделов"""
|
||||
metrics = {}
|
||||
total_used = 0
|
||||
total_capacity = 0
|
||||
|
||||
priority_mounts = ['/', '/home', '/boot', '/var', '/opt', '/data', '/mnt', '/srv', '/tmp']
|
||||
|
||||
for mountpoint in priority_mounts:
|
||||
try:
|
||||
usage = psutil.disk_usage(mountpoint)
|
||||
name = mountpoint.strip('/').replace('/', '_') or 'root'
|
||||
if name not in metrics:
|
||||
metrics[f'disk_used_{name}'] = round(usage.percent, 1)
|
||||
total_used += usage.used
|
||||
total_capacity += usage.total
|
||||
except (PermissionError, OSError, FileNotFoundError):
|
||||
pass
|
||||
|
||||
for part in psutil.disk_partitions(all=False):
|
||||
name = part.mountpoint.strip('/').replace('/', '_') or 'root'
|
||||
if name in metrics:
|
||||
continue
|
||||
if not _is_real_partition(part.mountpoint, part.fstype):
|
||||
continue
|
||||
try:
|
||||
usage = psutil.disk_usage(part.mountpoint)
|
||||
metrics[f'disk_used_{name}'] = round(usage.percent, 1)
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
if total_capacity > 0:
|
||||
metrics['disk_used'] = round((total_used / total_capacity) * 100, 1)
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
def get_metrics():
|
||||
"""Сбор системных метрик"""
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk_usage = psutil.disk_usage('/')
|
||||
|
||||
# Дисковые метрики для всех реальных разделов
|
||||
disk_metrics = get_disk_metrics()
|
||||
|
||||
# Получаем сетевую статистику
|
||||
try:
|
||||
|
|
@ -20,26 +128,70 @@ def get_metrics():
|
|||
except:
|
||||
net_io = None
|
||||
|
||||
return {
|
||||
result = {
|
||||
'cpu_load': cpu_percent,
|
||||
'ram_used': memory.percent,
|
||||
'disk_used': disk_usage.percent
|
||||
}
|
||||
result.update(disk_metrics)
|
||||
|
||||
# Метрики использования сети
|
||||
net_metrics = get_network_metrics()
|
||||
result.update(net_metrics)
|
||||
# RAM total GB
|
||||
result["ram_total_gb"] = round(memory.total / (1024**3), 1)
|
||||
|
||||
# Disk total GB - сначала приоритетные mountpoints
|
||||
priority_mounts = ['/', '/home', '/boot', '/var', '/opt', '/data', '/mnt', '/srv', '/tmp']
|
||||
for mountpoint in priority_mounts:
|
||||
try:
|
||||
usage = psutil.disk_usage(mountpoint)
|
||||
name = mountpoint.strip("/").replace("/", "_") or "root"
|
||||
if f"disk_total_gb_{name}" not in result:
|
||||
result[f"disk_total_gb_{name}"] = round(usage.total / (1024**3), 1)
|
||||
except (PermissionError, OSError, FileNotFoundError):
|
||||
pass
|
||||
|
||||
for part in psutil.disk_partitions(all=False):
|
||||
try:
|
||||
usage = psutil.disk_usage(part.mountpoint)
|
||||
name = part.mountpoint.strip("/").replace("/", "_") or "root"
|
||||
if f"disk_total_gb_{name}" not in result:
|
||||
result[f"disk_total_gb_{name}"] = round(usage.total / (1024**3), 1)
|
||||
except (PermissionError, OSError):
|
||||
pass
|
||||
|
||||
if net_metrics:
|
||||
print(f" Сетевые метрики: {net_metrics}")
|
||||
|
||||
# Сетевые метрики
|
||||
if net_io:
|
||||
result['network_rx'] = round(net_io.bytes_recv / (1024 * 1024), 2)
|
||||
result['network_tx'] = round(net_io.bytes_sent / (1024 * 1024), 2)
|
||||
|
||||
return result
|
||||
|
||||
def get_top_processes(process_type='cpu'):
|
||||
"""Сбор топ-5 процессов по CPU или RAM"""
|
||||
processes = []
|
||||
|
||||
try:
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent']):
|
||||
for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent', 'cmdline']):
|
||||
try:
|
||||
info = proc.info
|
||||
if info['cpu_percent'] is None or info['memory_percent'] is None:
|
||||
continue
|
||||
|
||||
cmdline = info.get('cmdline') or []
|
||||
if cmdline:
|
||||
full_cmd = ' '.join(cmdline)
|
||||
cmd_display = full_cmd[:120] + ('...' if len(full_cmd) > 120 else '')
|
||||
else:
|
||||
cmd_display = info.get('name', '')
|
||||
|
||||
processes.append({
|
||||
'pid': info['pid'],
|
||||
'name': info['name'],
|
||||
'cmdline': cmd_display,
|
||||
'value': round(info[process_type + '_percent'], 1)
|
||||
})
|
||||
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
|
||||
|
|
@ -61,45 +213,139 @@ def get_top_processes(process_type='cpu'):
|
|||
return []
|
||||
|
||||
def get_services():
|
||||
"""Сбор списка сервисов через systemctl"""
|
||||
"""Сбор списка сервисов через systemctl (list-unit-files + list-units)"""
|
||||
try:
|
||||
result = subprocess.run(['systemctl', 'list-units', '--type=service', '--no-pager', '--all'],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
# 1. Получаем полный список всех сервисов (включая dead/выгруженные)
|
||||
res_files = subprocess.run(['systemctl', 'list-unit-files', '--type=service', '--no-pager'],
|
||||
capture_output=True, text=True, timeout=10)
|
||||
|
||||
# 2. Получаем текущие статусы активных/загруженных сервисов
|
||||
res_units = subprocess.run(['systemctl', 'list-units', '--type=service', '--all', '--no-pager'],
|
||||
capture_output=True, text=True, timeout=10)
|
||||
|
||||
# Парсим unit-files (список всех сервисов)
|
||||
all_services = {}
|
||||
for line in res_files.stdout.split('\n'):
|
||||
parts = line.split()
|
||||
if parts and parts[0].endswith('.service'):
|
||||
all_services[parts[0]] = {'name': parts[0], 'enabled_state': parts[1] if len(parts) > 1 else 'unknown'}
|
||||
|
||||
# Парсим list-units (текущее состояние)
|
||||
running_states = {}
|
||||
for line in res_units.stdout.split('\n'):
|
||||
parts = line.split(None, 4)
|
||||
if len(parts) >= 4 and parts[0].endswith('.service'):
|
||||
running_states[parts[0]] = {
|
||||
'load_state': parts[1],
|
||||
'active_state': parts[2],
|
||||
'sub_state': parts[3]
|
||||
}
|
||||
|
||||
services = []
|
||||
# Объединяем: берем все сервисы из list-unit-files
|
||||
for svc_name in all_services.keys():
|
||||
if svc_name in running_states:
|
||||
state = running_states[svc_name]
|
||||
load = state['load_state']
|
||||
active = state['active_state']
|
||||
sub = state['sub_state']
|
||||
else:
|
||||
# Сервис есть в системе, но не загружен (dead)
|
||||
load = 'loaded' # Обычно loaded, если файл юнита есть
|
||||
active = 'inactive'
|
||||
sub = 'dead'
|
||||
|
||||
for line in result.stdout.split('\n')[1:]: # Пропускаем заголовок
|
||||
if not line.strip():
|
||||
continue
|
||||
|
||||
parts = line.split(None, 4) # Разделяем на 5 частей максимум
|
||||
if len(parts) >= 4:
|
||||
service_name = parts[0]
|
||||
load_state = parts[1] if len(parts) > 1 else ''
|
||||
active_state = parts[2] if len(parts) > 2 else ''
|
||||
sub_state = parts[3] if len(parts) > 3 else ''
|
||||
|
||||
# Определяем статус сервиса
|
||||
if active_state == 'active':
|
||||
if active == 'active':
|
||||
status = 'running'
|
||||
elif active_state in ['inactive', 'failed']:
|
||||
elif active in ['inactive', 'failed', 'deactivating']:
|
||||
status = 'stopped'
|
||||
else:
|
||||
status = 'unknown'
|
||||
|
||||
services.append({
|
||||
'name': service_name,
|
||||
'name': svc_name,
|
||||
'status': status,
|
||||
'load_state': load_state,
|
||||
'active_state': active_state,
|
||||
'sub_state': sub_state
|
||||
'load_state': load,
|
||||
'active_state': active,
|
||||
'sub_state': sub
|
||||
})
|
||||
|
||||
return services
|
||||
|
||||
except Exception as e:
|
||||
print(f"Ошибка получения сервисов: {e}")
|
||||
print(f"Ошибка получения списка сервисов: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def get_temperatures():
|
||||
"""Сбор температур (CPU, GPU, Disks)"""
|
||||
temps = {}
|
||||
|
||||
# 1. CPU via psutil
|
||||
try:
|
||||
sensors = psutil.sensors_temperatures()
|
||||
if sensors:
|
||||
cpu_temps = []
|
||||
for name, entries in sensors.items():
|
||||
if name.lower() in ['coretemp', 'k10temp', 'zenpower']:
|
||||
for entry in entries:
|
||||
if entry.current:
|
||||
cpu_temps.append(entry.current)
|
||||
if cpu_temps:
|
||||
temps['temp_cpu'] = max(cpu_temps)
|
||||
elif not temps:
|
||||
for entries in sensors.values():
|
||||
for entry in entries:
|
||||
if entry.current:
|
||||
cpu_temps.append(entry.current)
|
||||
if cpu_temps:
|
||||
temps['temp_cpu'] = max(cpu_temps)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2. Disks via smartctl
|
||||
try:
|
||||
import glob
|
||||
disks = glob.glob('/dev/sd[a-z]') + glob.glob('/dev/nvme[0-9]n1')
|
||||
for disk in disks:
|
||||
res = subprocess.run(['smartctl', '-n', 'standby', '-A', disk],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
if res.returncode == 0 and 'STANDBY' not in res.stdout.upper():
|
||||
for line in res.stdout.split('\n'):
|
||||
if 'Temperature' in line:
|
||||
parts = line.split()
|
||||
# Ищем число в диапазоне 10-100
|
||||
for p in reversed(parts):
|
||||
try:
|
||||
v = int(p)
|
||||
if 10 < v < 100:
|
||||
disk_name = disk.split('/')[-1]
|
||||
temps[f'temp_disk_{disk_name}'] = float(v)
|
||||
break
|
||||
except ValueError:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3. GPU via nvidia-smi
|
||||
try:
|
||||
res = subprocess.run(['nvidia-smi', '--query-gpu=temperature.gpu', '--format=csv,noheader'],
|
||||
capture_output=True, text=True, timeout=5)
|
||||
if res.returncode == 0:
|
||||
lines = res.stdout.strip().split('\n')
|
||||
if len(lines) == 1:
|
||||
try:
|
||||
temps['temp_gpu'] = float(lines[0])
|
||||
except: pass
|
||||
else:
|
||||
for i, line in enumerate(lines):
|
||||
try:
|
||||
temps[f'temp_gpu_{i}'] = float(line)
|
||||
except: pass
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return temps
|
||||
|
||||
def send_metrics():
|
||||
"""Отправка метрик на сервер"""
|
||||
with open('/opt/server-monitor-agent/config.json', 'r') as f:
|
||||
|
|
@ -110,6 +356,8 @@ def send_metrics():
|
|||
|
||||
# Собираем метрики
|
||||
metrics = get_metrics()
|
||||
temps = get_temperatures()
|
||||
metrics.update(temps)
|
||||
|
||||
# Собираем топ-процессы
|
||||
top_cpu = get_top_processes('cpu')
|
||||
|
|
|
|||
|
|
@ -0,0 +1,17 @@
|
|||
-- 008: Авто-очистка старых метрик (старше 60 дней)
|
||||
-- Запускается автоматически каждый день в 03:00
|
||||
|
||||
-- Создаём событие очистки (работает от mon_user если даны права EVENT)
|
||||
-- Если mon_user не может создать событие — запустите вручную от root:
|
||||
-- CREATE EVENT ... (см. ниже)
|
||||
--
|
||||
-- Для Docker: event_scheduler включается через docker-compose command
|
||||
-- Для ручной установки: добавьте event_scheduler=ON в /etc/mysql/mariadb.conf.d/
|
||||
|
||||
-- Если есть привилегии — создаём событие:
|
||||
CREATE EVENT IF NOT EXISTS daily_metrics_cleanup
|
||||
ON SCHEDULE EVERY 1 DAY
|
||||
STARTS CURRENT_DATE + INTERVAL 1 DAY + INTERVAL 3 HOUR
|
||||
ON COMPLETION PRESERVE
|
||||
DO
|
||||
DELETE FROM server_metrics WHERE created_at < NOW() - INTERVAL 60 DAY;
|
||||
|
|
@ -233,6 +233,7 @@ $app->get('/api/status', function (Request $request, Response $response, $args)
|
|||
$app->get('/agent/install.sh', [$agentController, 'generateInstallScript']);
|
||||
$app->get('/agent/install.ps1', [$agentController, 'generateWindowsInstallScript']);
|
||||
$app->get('/agent/install.bat', [$agentController, 'generateWindowsBatScript']);
|
||||
$app->get('/agent/agent.py', [$agentController, 'downloadAgent']);
|
||||
|
||||
// Run app
|
||||
$app->run();
|
||||
|
|
@ -17,7 +17,6 @@ class AgentController extends Model
|
|||
$token = $queryParams['token'] ?? null;
|
||||
$server_id = $queryParams['server_id'] ?? null;
|
||||
|
||||
// Если передан server_id, получаем оригинальный токен из зашифрованного
|
||||
if (!empty($server_id) && empty($token)) {
|
||||
$stmt = $this->pdo->prepare("SELECT encrypted_token FROM agent_tokens WHERE server_id = :server_id LIMIT 1");
|
||||
$stmt->execute([':server_id' => $server_id]);
|
||||
|
|
@ -34,250 +33,73 @@ class AgentController extends Model
|
|||
}
|
||||
|
||||
$apiUrl = 'https://mon.mirv.top/api/v1/metrics';
|
||||
$agentDownloadUrl = 'https://mon.mirv.top/agent/agent.py?token=' . $token;
|
||||
|
||||
// Формируем скрипт с прямой подстановкой значений
|
||||
$script = "#!/bin/bash
|
||||
$script = <<<BASH
|
||||
#!/bin/bash
|
||||
|
||||
# Скрипт установки агента мониторинга с поддержкой сервисов
|
||||
# =====================================================
|
||||
# Скрипт установки агента мониторинга
|
||||
# Сгенерировано автоматически
|
||||
# =====================================================
|
||||
|
||||
TOKEN='" . $token . "'
|
||||
API_URL='" . $apiUrl . "'
|
||||
set -e
|
||||
|
||||
echo 'Установка агента мониторинга...'
|
||||
TOKEN='{$token}'
|
||||
API_URL='{$apiUrl}'
|
||||
AGENT_URL='{$agentDownloadUrl}'
|
||||
INSTALL_DIR='/opt/server-monitor-agent'
|
||||
|
||||
echo '=============================================='
|
||||
echo ' Установка агента мониторинга серверов'
|
||||
echo '=============================================='
|
||||
echo ''
|
||||
|
||||
# Проверяем наличие Python3
|
||||
if ! command -v python3 &> /dev/null; then
|
||||
echo 'Установка Python3...'
|
||||
apt-get update
|
||||
apt-get install -y python3 python3-pip lm-sensors smartmontools
|
||||
echo '[1/6] Установка Python3...'
|
||||
apt-get update -qq
|
||||
apt-get install -y -qq python3 python3-pip || apt-get install -y python3 python3-pip
|
||||
else
|
||||
echo '[1/6] Python3 найден'
|
||||
fi
|
||||
|
||||
# Устанавливаем psutil
|
||||
pip3 install psutil || easy_install3 psutil
|
||||
# Устанавливаем зависимости (lm-sensors и smartmontools опциональны)
|
||||
echo '[2/6] Установка зависимостей (psutil, lm-sensors, smartmontools)...'
|
||||
pip3 install --quiet psutil 2>/dev/null || pip3 install psutil 2>/dev/null || true
|
||||
apt-get install -y -qq lm-sensors smartmontools 2>/dev/null || true
|
||||
|
||||
# Создаем директорию для агента
|
||||
mkdir -p /opt/server-monitor-agent
|
||||
cd /opt/server-monitor-agent
|
||||
echo '[3/6] Создание директории агента...'
|
||||
mkdir -p "$INSTALL_DIR"
|
||||
|
||||
# Скачиваем агента
|
||||
echo '[4/6] Скачивание агента...'
|
||||
if ! curl -fsSL "$AGENT_URL" -o "$INSTALL_DIR/agent.py" 2>/dev/null; then
|
||||
echo 'ERROR: Не удалось скачать агента. Проверьте токен и подключение к серверу.'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! grep -q 'psutil' "$INSTALL_DIR/agent.py"; then
|
||||
echo 'ERROR: Скачанный файл не является агентом мониторинга.'
|
||||
exit 1
|
||||
fi
|
||||
|
||||
chmod +x "$INSTALL_DIR/agent.py"
|
||||
|
||||
# Создаем конфигурационный файл
|
||||
echo '{
|
||||
\\\"token\\\": \\\"" . $token . "\\\"\\,
|
||||
\\\"api_url\\\": \\\"" . $apiUrl . "\\\"\\,
|
||||
\\\"interval_seconds\\\": 60
|
||||
}' > config.json
|
||||
|
||||
# Создаем Python-скрипт агента с поддержкой сервисов
|
||||
cat > agent.py << 'PYTHON_EOF'
|
||||
import time
|
||||
import json
|
||||
import psutil
|
||||
import requests
|
||||
import subprocess
|
||||
import os
|
||||
from datetime import datetime
|
||||
|
||||
def get_metrics():
|
||||
\\\"\\\"\\\"Сбор системных метрик\\\"\\\"\\\"
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
memory = psutil.virtual_memory()
|
||||
disk_usage = psutil.disk_usage('/')
|
||||
|
||||
# Получаем сетевую статистику
|
||||
try:
|
||||
net_io = psutil.net_io_counters()
|
||||
except:
|
||||
net_io = None
|
||||
|
||||
metrics = {
|
||||
'cpu_load': round(cpu_percent, 2),
|
||||
'ram_used': round(memory.percent, 2),
|
||||
'disk_used': round((disk_usage.used / disk_usage.total) * 100, 2),
|
||||
'network_in': round((net_io.bytes_recv / (1024*1024)) if net_io else 0, 2), # MB
|
||||
'network_out': round((net_io.bytes_sent / (1024*1024)) if net_io else 0, 2) # MB
|
||||
echo '[5/6] Создание конфигурации...'
|
||||
cat > "$INSTALL_DIR/config.json" << CONFIG_EOF
|
||||
{
|
||||
"token": "$TOKEN",
|
||||
"api_url": "$API_URL",
|
||||
"interval_seconds": 60
|
||||
}
|
||||
|
||||
return metrics
|
||||
|
||||
def get_services():
|
||||
\\\"\\\"\\\"Сбор статусов всех сервисов\\\"\\\"\\\"
|
||||
services = []
|
||||
|
||||
try:
|
||||
# Получаем список всех сервисов
|
||||
result = subprocess.run(
|
||||
['systemctl', 'list-units', '--type=service', '--all', '--no-pager'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=30
|
||||
)
|
||||
|
||||
lines = result.stdout.strip().split('\\n')
|
||||
|
||||
for line in lines[1:]: # Пропускаем заголовок
|
||||
parts = line.split()
|
||||
if len(parts) >= 4:
|
||||
service_name = parts[0].replace('.service', '')
|
||||
load_state = parts[1]
|
||||
active_state = parts[2]
|
||||
sub_state = parts[3] if len(parts) > 3 else ''
|
||||
|
||||
# Определяем статус сервиса
|
||||
if active_state == 'active' and sub_state == 'running':
|
||||
status = 'running'
|
||||
elif active_state in ['inactive', 'failed', 'dead']:
|
||||
status = 'stopped'
|
||||
else:
|
||||
status = 'unknown'
|
||||
|
||||
# Пропускаем системные сервисы без .service в имени
|
||||
if not service_name.startswith('system-'):
|
||||
services.append({
|
||||
'name': service_name,
|
||||
'status': status,
|
||||
'load_state': load_state,
|
||||
'active_state': active_state,
|
||||
'sub_state': sub_state
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
print(f'Ошибка при получении списка сервисов: {e}')
|
||||
|
||||
return services
|
||||
|
||||
def get_config_from_server():
|
||||
\\\"\\\"\\\"Получение конфигурации с сервера\\\"\\\"\\\"
|
||||
try:
|
||||
with open('config.json', 'r') as f:
|
||||
config = json.load(f)
|
||||
except Exception as e:
|
||||
print(f'Ошибка чтения конфига: {e}')
|
||||
return None
|
||||
|
||||
token = config.get('token')
|
||||
if not token:
|
||||
print('Отсутствует токен в конфиге')
|
||||
return None
|
||||
|
||||
# Определяем URL для получения конфигурации
|
||||
server_id = token.split('-')[0] if '-' in token else '1'
|
||||
|
||||
try:
|
||||
response = requests.get(
|
||||
f\\\"\\\"{config['api_url']}/agent/{server_id}/config\\\"\\\"\\\",
|
||||
headers={'Authorization': f'Bearer {token}'},
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
server_config = response.json()
|
||||
|
||||
# Обновляем локальный конфиг
|
||||
config['interval_seconds'] = server_config.get('interval_seconds', config['interval_seconds'])
|
||||
config['monitor_services'] = server_config.get('monitor_services', config.get('monitor_services', []))
|
||||
|
||||
# Сохраняем обновленный конфиг
|
||||
with open('config.json', 'w') as f:
|
||||
json.dump(config, f, indent=2)
|
||||
|
||||
return config
|
||||
else:
|
||||
print(f'Ошибка получения конфига с сервера: {response.status_code}')
|
||||
return config
|
||||
|
||||
except Exception as e:
|
||||
print(f'Ошибка подключения к серверу: {e}')
|
||||
return config
|
||||
|
||||
def send_metrics(config, metrics, services):
|
||||
\\\"\\\"\\\"Отправка метрик и сервисов на сервер\\\"\\\"\\\"
|
||||
data = {
|
||||
'token': config['token'],
|
||||
'metrics': metrics,
|
||||
'services': services
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
config['api_url'],
|
||||
json=data,
|
||||
timeout=10
|
||||
)
|
||||
if response.status_code == 200:
|
||||
print(f'{datetime.now().strftime(\\\"%Y-%m-%d %H:%M:%S\\\")} - Метрики отправлены успешно')
|
||||
return True
|
||||
else:
|
||||
print(f'Ошибка отправки метрик: {response.status_code}')
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f'Ошибка отправки метрик: {e}')
|
||||
return False
|
||||
|
||||
def main():
|
||||
\\\"\\\"\\\"Главная функция агента\\\"\\\"\\\"
|
||||
print('Агент мониторинга запущен...')
|
||||
|
||||
# Загружаем конфигурацию
|
||||
config = get_config_from_server()
|
||||
if not config:
|
||||
print('Не удалось загрузить конфигурацию')
|
||||
return
|
||||
|
||||
interval = config.get('interval_seconds', 60)
|
||||
monitor_services = config.get('monitor_services', [])
|
||||
|
||||
print(f'Интервал отправки: {interval} сек')
|
||||
print(f'Мониторинг сервисов: {\\\"включен\\\" if monitor_services else \\\"все сервисы\\\"}')
|
||||
|
||||
last_config_update = time.time()
|
||||
|
||||
while True:
|
||||
try:
|
||||
# Проверяем нужно ли обновить конфиг (каждые 5 минут)
|
||||
if time.time() - last_config_update > 300:
|
||||
print('Проверка обновления конфигурации...')
|
||||
config = get_config_from_server()
|
||||
last_config_update = time.time()
|
||||
|
||||
# Обновляем интервал если изменился
|
||||
interval = config.get('interval_seconds', 60)
|
||||
monitor_services = config.get('monitor_services', [])
|
||||
|
||||
# Собираем метрики
|
||||
metrics = get_metrics()
|
||||
|
||||
# Собираем сервисы
|
||||
services = get_services()
|
||||
|
||||
# Если указаны конкретные сервисы для мониторинга - фильтруем
|
||||
if monitor_services:
|
||||
services = [s for s in services if s['name'] in monitor_services]
|
||||
print(f'Мониторинг {len(services)} сервисов: {[s[\\\"name\\\"] for s in services]}')
|
||||
|
||||
# Отправляем данные
|
||||
success = send_metrics(config, metrics, services)
|
||||
|
||||
if success:
|
||||
print(f'Метрики отправлены: CPU={metrics[\\\"cpu_load\\\"]}%, RAM={metrics[\\\"ram_used\\\"]}%, Disk={metrics[\\\"disk_used\\\"]}%')
|
||||
else:
|
||||
print('Ошибка отправки метрик')
|
||||
|
||||
# Ждем указанный интервал
|
||||
time.sleep(interval)
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print('Агент остановлен')
|
||||
break
|
||||
except Exception as e:
|
||||
print(f'Ошибка: {e}')
|
||||
time.sleep(10)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
PYTHON_EOF
|
||||
CONFIG_EOF
|
||||
|
||||
# Создаем systemd сервис
|
||||
cat > /etc/systemd/system/server-monitor-agent.service << 'SERVICE_EOF'
|
||||
echo '[6/6] Регистрация системной службы...'
|
||||
cat > /etc/systemd/system/server-monitor-agent.service << SERVICE_EOF
|
||||
[Unit]
|
||||
Description=Server Monitor Agent
|
||||
After=network.target
|
||||
|
|
@ -285,8 +107,8 @@ After=network.target
|
|||
[Service]
|
||||
Type=simple
|
||||
User=root
|
||||
WorkingDirectory=/opt/server-monitor-agent
|
||||
ExecStart=/usr/bin/python3 /opt/server-monitor-agent/agent.py
|
||||
WorkingDirectory=$INSTALL_DIR
|
||||
ExecStart=/usr/bin/python3 $INSTALL_DIR/agent.py
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
|
||||
|
|
@ -294,22 +116,22 @@ RestartSec=10
|
|||
WantedBy=multi-user.target
|
||||
SERVICE_EOF
|
||||
|
||||
# Делаем скрипт исполняемым
|
||||
chmod +x agent.py
|
||||
|
||||
# Перезагружаем systemd
|
||||
# Активируем и запускаем сервис
|
||||
systemctl daemon-reload
|
||||
|
||||
# Включаем автозапуск сервиса
|
||||
systemctl enable server-monitor-agent
|
||||
|
||||
# Запускаем сервис
|
||||
systemctl stop server-monitor-agent 2>/dev/null || true
|
||||
systemctl start server-monitor-agent
|
||||
|
||||
echo 'Агент мониторинга установлен и запущен!'
|
||||
echo 'Статус сервиса:'
|
||||
systemctl status server-monitor-agent
|
||||
";
|
||||
echo ''
|
||||
echo '=============================================='
|
||||
echo ' Агент мониторинга успешно установлен!'
|
||||
echo '=============================================='
|
||||
echo ''
|
||||
echo 'Директория: $INSTALL_DIR'
|
||||
echo 'Логи: journalctl -u server-monitor-agent -f'
|
||||
echo 'Статус: systemctl status server-monitor-agent'
|
||||
echo ''
|
||||
BASH;
|
||||
|
||||
$response->getBody()->write($script);
|
||||
return $response
|
||||
|
|
@ -317,6 +139,44 @@ systemctl status server-monitor-agent
|
|||
->withHeader('Content-Disposition', 'attachment; filename="install.sh"');
|
||||
}
|
||||
|
||||
public function downloadAgent(Request $request, Response $response, $args)
|
||||
{
|
||||
$queryParams = $request->getQueryParams();
|
||||
$token = $queryParams['token'] ?? null;
|
||||
|
||||
if (empty($token)) {
|
||||
$response->getBody()->write('Token is required');
|
||||
return $response->withStatus(403);
|
||||
}
|
||||
|
||||
$tokenHash = hash('sha256', $token);
|
||||
$stmt = $this->pdo->prepare("SELECT server_id FROM agent_tokens WHERE token_hash = :hash LIMIT 1");
|
||||
$stmt->execute([':hash' => $tokenHash]);
|
||||
$result = $stmt->fetch();
|
||||
|
||||
if (!$result) {
|
||||
$response->getBody()->write('Invalid token');
|
||||
return $response->withStatus(403);
|
||||
}
|
||||
|
||||
$stmt = $this->pdo->prepare("UPDATE agent_tokens SET last_used_at = NOW() WHERE token_hash = :hash");
|
||||
$stmt->execute([':hash' => $tokenHash]);
|
||||
|
||||
$agentPath = dirname(__DIR__, 2) . '/agent.py';
|
||||
if (!file_exists($agentPath)) {
|
||||
$response->getBody()->write('Agent not found');
|
||||
return $response->withStatus(404);
|
||||
}
|
||||
|
||||
$content = file_get_contents($agentPath);
|
||||
|
||||
return $response
|
||||
->getBody()
|
||||
->write($content)
|
||||
->withHeader('Content-Type', 'text/plain; charset=UTF-8')
|
||||
->withHeader('Content-Disposition', 'attachment; filename="agent.py"');
|
||||
}
|
||||
|
||||
public function getConfig(Request $request, Response $response, $args)
|
||||
{
|
||||
$serverId = $args['id'];
|
||||
|
|
|
|||
|
|
@ -200,8 +200,31 @@ class ServerDetailController extends Model
|
|||
];
|
||||
}
|
||||
|
||||
// Типы метрик
|
||||
$stmt = $this->pdo->query("SELECT id, name, unit FROM metric_names WHERE name NOT LIKE '%\_proc' AND name NOT LIKE 'disk_total_gb_%' AND name != 'disk_used' AND name != 'ram_total_gb' AND name NOT IN ('net_in', 'net_out') AND name NOT LIKE 'network_%' ORDER BY name");
|
||||
// Типы метрик — только те что отображаются на графиках и есть у сервера
|
||||
$stmt = $this->pdo->prepare("
|
||||
SELECT DISTINCT mn.id, mn.name, mn.unit
|
||||
FROM metric_names mn
|
||||
JOIN server_metrics sm ON sm.metric_name_id = mn.id
|
||||
WHERE sm.server_id = :id
|
||||
AND (
|
||||
mn.name IN ('cpu_load', 'ram_used')
|
||||
OR mn.name LIKE 'disk_used_%'
|
||||
OR mn.name LIKE 'net_in_%'
|
||||
OR mn.name LIKE 'net_out_%'
|
||||
OR mn.name LIKE 'temp_%'
|
||||
)
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN mn.name = 'cpu_load' THEN 1
|
||||
WHEN mn.name = 'ram_used' THEN 2
|
||||
WHEN mn.name LIKE 'disk_used_%' THEN 3
|
||||
WHEN mn.name LIKE 'net_in_%' THEN 4
|
||||
WHEN mn.name LIKE 'net_out_%' THEN 5
|
||||
WHEN mn.name LIKE 'temp_%' THEN 6
|
||||
END,
|
||||
mn.name
|
||||
");
|
||||
$stmt->execute([':id' => $id]);
|
||||
$allMetricTypes = $stmt->fetchAll();
|
||||
|
||||
// Сервисы
|
||||
|
|
@ -286,7 +309,31 @@ class ServerDetailController extends Model
|
|||
$id = $args['id'];
|
||||
$params = $request->getParsedBody();
|
||||
|
||||
$stmt = $this->pdo->query("SELECT id, name FROM metric_names WHERE name NOT LIKE '%\_proc' AND name NOT LIKE 'disk_total_gb_%' AND name != 'disk_used' AND name != 'ram_total_gb' AND name NOT IN ('net_in', 'net_out') AND name NOT LIKE 'network_%' ORDER BY name");
|
||||
// Получаем только метрики которые есть у сервера и отображаются на графиках
|
||||
$stmt = $this->pdo->prepare("
|
||||
SELECT DISTINCT mn.id, mn.name, mn.unit
|
||||
FROM metric_names mn
|
||||
JOIN server_metrics sm ON sm.metric_name_id = mn.id
|
||||
WHERE sm.server_id = :id
|
||||
AND (
|
||||
mn.name IN ('cpu_load', 'ram_used')
|
||||
OR mn.name LIKE 'disk_used_%'
|
||||
OR mn.name LIKE 'net_in_%'
|
||||
OR mn.name LIKE 'net_out_%'
|
||||
OR mn.name LIKE 'temp_%'
|
||||
)
|
||||
ORDER BY
|
||||
CASE
|
||||
WHEN mn.name = 'cpu_load' THEN 1
|
||||
WHEN mn.name = 'ram_used' THEN 2
|
||||
WHEN mn.name LIKE 'disk_used_%' THEN 3
|
||||
WHEN mn.name LIKE 'net_in_%' THEN 4
|
||||
WHEN mn.name LIKE 'net_out_%' THEN 5
|
||||
WHEN mn.name LIKE 'temp_%' THEN 6
|
||||
END,
|
||||
mn.name
|
||||
");
|
||||
$stmt->execute([':id' => $id]);
|
||||
$metricTypes = $stmt->fetchAll();
|
||||
|
||||
$stmt = $this->pdo->prepare("DELETE FROM metric_thresholds WHERE server_id = :server_id");
|
||||
|
|
|
|||
|
|
@ -169,6 +169,7 @@
|
|||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
{% if net_interfaces|length > 0 %}
|
||||
{% for iface in net_interfaces %}
|
||||
{% if metrics['net_in_' ~ iface] is defined and metrics['net_out_' ~ iface] is defined %}
|
||||
<div class="row">
|
||||
|
|
@ -185,11 +186,23 @@
|
|||
</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<div class="alert alert-warning mb-4">
|
||||
<i class="fas fa-network-wired"></i> Данные о сетевых интерфейсах не получены
|
||||
</div>
|
||||
{% endif %}
|
||||
|
||||
|
||||
|
||||
|
||||
<!-- Температуры: один общий график -->
|
||||
{% set has_temps = false %}
|
||||
{% for metricName in metrics|keys %}
|
||||
{% if metricName starts with 'temp_' %}
|
||||
{% set has_temps = true %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
<div class="row mb-4">
|
||||
<div class="col-12">
|
||||
<div class="card">
|
||||
|
|
@ -197,14 +210,28 @@
|
|||
<h6 class="mb-0"><i class="fas fa-thermometer-half"></i> Температуры</h6>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
{% if has_temps %}
|
||||
<canvas id="chart-temperatures" width="100%" height="300"></canvas>
|
||||
{% else %}
|
||||
<div class="alert alert-info mb-0">
|
||||
<i class="fas fa-thermometer-half"></i> Температурные датчики недоступны (возможно виртуальный сервер)
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Диски: Doughnut графики -->
|
||||
{% set has_disk_parts = false %}
|
||||
{% for metricName in metrics|keys %}
|
||||
{% if metricName starts with 'disk_used_' and metricName != 'disk_used' %}
|
||||
{% set has_disk_parts = true %}
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
|
||||
<div class="row mb-3">
|
||||
{% if has_disk_parts %}
|
||||
{% for metricName, metricData in metrics %}
|
||||
{% if metricName starts with 'disk_used_' and metricName != 'disk_used' %}
|
||||
<div class="col-md-4 mb-3">
|
||||
|
|
@ -235,6 +262,13 @@
|
|||
</div>
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
{% else %}
|
||||
<div class="col-12">
|
||||
<div class="alert alert-warning">
|
||||
<i class="fas fa-hdd"></i> Данные о разделах диска не получены. Проверьте работу агента.
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
|
@ -412,12 +446,36 @@
|
|||
<div class="col-12">
|
||||
<form method="post" action="/servers/{{ server.id }}/thresholds">
|
||||
{% for metricType in allMetricTypes %}
|
||||
{% set metricUnit = '%' %}
|
||||
{% set metricLabel = metricType.name %}
|
||||
{% if metricType.name starts with 'temp_' %}
|
||||
{% set metricUnit = '°C' %}
|
||||
{% set metricLabel = 'Температура ' ~ (metricType.name|replace({'temp_': '', '_': ' '}))|title %}
|
||||
{% elseif metricType.name == 'cpu_load' %}
|
||||
{% set metricLabel = 'Загрузка CPU' %}
|
||||
{% elseif metricType.name == 'ram_used' %}
|
||||
{% set metricLabel = 'Использование RAM' %}
|
||||
{% elseif metricType.name starts with 'disk_used_' %}
|
||||
{% set iface = metricType.name|replace({'disk_used_': ''}) %}
|
||||
{% if iface == 'root' %}{% set metricLabel = 'Диск (корень /)' %}
|
||||
{% elseif iface == 'home' %}{% set metricLabel = 'Диск (/home)' %}
|
||||
{% elseif iface == 'boot' %}{% set metricLabel = 'Диск (/boot)' %}
|
||||
{% elseif iface == 'mnt_data' %}{% set metricLabel = 'Диск (/mnt/data)' %}
|
||||
{% else %}{% set metricLabel = 'Диск (/' ~ (iface|replace({'_': '/'})) ~ ')' %}
|
||||
{% endif %}
|
||||
{% elseif metricType.name starts with 'net_in_' %}
|
||||
{% set iface = metricType.name|replace({'net_in_': ''}) %}
|
||||
{% set metricLabel = 'Сеть входящая (' ~ iface ~ ')' %}
|
||||
{% elseif metricType.name starts with 'net_out_' %}
|
||||
{% set iface = metricType.name|replace({'net_out_': ''}) %}
|
||||
{% set metricLabel = 'Сеть исходящая (' ~ iface ~ ')' %}
|
||||
{% endif %}
|
||||
<div class="card mb-2">
|
||||
<div class="card-body py-2">
|
||||
<div class="row align-items-center">
|
||||
<div class="col-md-3 mb-2 mb-md-0">
|
||||
<strong>{{ metricType.name|replace({'_': ' ', 'load': 'загрузка', 'used': 'использование'})|title }}</strong>
|
||||
{% if metricType.unit %}<small class="text-muted">({{ metricType.unit }})</small>{% endif %}
|
||||
<strong>{{ metricLabel }}</strong>
|
||||
<small class="text-muted">({{ metricUnit }})</small>
|
||||
</div>
|
||||
<div class="col-md-3 mb-2 mb-md-0">
|
||||
<div class="input-group input-group-sm" title="Порог предупреждения">
|
||||
|
|
@ -430,7 +488,7 @@
|
|||
{% if existingThresholds[metricType.name].warning is defined %}
|
||||
value="{{ existingThresholds[metricType.name].warning }}"
|
||||
{% endif %}>
|
||||
<span class="input-group-text">%</span>
|
||||
<span class="input-group-text">{{ metricUnit }}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-3 mb-2 mb-md-0">
|
||||
|
|
@ -444,7 +502,7 @@
|
|||
{% if existingThresholds[metricType.name].critical is defined %}
|
||||
value="{{ existingThresholds[metricType.name].critical }}"
|
||||
{% endif %}>
|
||||
<span class="input-group-text">%</span>
|
||||
<span class="input-group-text">{{ metricUnit }}</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="col-md-3">
|
||||
|
|
@ -566,7 +624,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||
});
|
||||
|
||||
// Параметры системы
|
||||
var ramTotalGB = {{ metrics['ram_total_gb'] is defined ? metrics['ram_total_gb'][0].value : 0 }};
|
||||
var ramTotalGB = {{ metrics['ram_total_gb'] is defined ? metrics['ram_total_gb'][0].value : 'null' }};
|
||||
var diskTotalGB = {
|
||||
{% for m, _data in metrics %}
|
||||
{% if m starts with 'disk_total_gb_' %}
|
||||
|
|
@ -664,12 +722,18 @@ const chart{{ metricName|replace({'-': '_', '.': '_'}) }} = new Chart(ctx{{ metr
|
|||
lines.push('Время: ' + time);
|
||||
{% if metricName == 'ram_used' %}
|
||||
var ramPct = data{{ metricName }}[dataIndex];
|
||||
if (ramTotalGB !== null) {
|
||||
var ramUsed = (ramPct / 100 * ramTotalGB).toFixed(1);
|
||||
var ramFree = (ramTotalGB - ramUsed).toFixed(1);
|
||||
lines.push('Всего: ' + ramTotalGB.toFixed(1) + ' ГБ');
|
||||
lines.push('Занято: ' + ramUsed + ' ГБ');
|
||||
lines.push('Свободно: ' + ramFree + ' ГБ');
|
||||
lines.push('');
|
||||
} else {
|
||||
lines.push('RAM: ' + ramPct + '%');
|
||||
lines.push('(данные о памяти недоступны)');
|
||||
lines.push('');
|
||||
}
|
||||
if (data.top_ram && data.top_ram.length > 0) {
|
||||
lines.push('TOP RAM:');
|
||||
data.top_ram.forEach(function(proc) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue