fix: Update agent install, disk metrics, fallback blocks, and dynamic thresholds

BREAKING: install.sh now downloads agent from server instead of embedding it

Changes:
- AgentController: downloadAgent() method for serving agent.py with token auth
- AgentController: rewrite generateInstallScript() to curl agent from server
- agent.py: copy production version from server (with temp, disk, network metrics)
- agent.py: fix get_disk_metrics() to use priority mountpoints (/, /home, etc)
- agent.py: fix disk_total_gb collection to use priority mountpoints
- detail.twig: add fallback blocks for temperatures (alert-info)
- detail.twig: add fallback blocks for disk doughnuts (alert-warning)
- detail.twig: add fallback blocks for network graphs (alert-warning)
- detail.twig: add null check for ramTotalGB in tooltip
- detail.twig: improve thresholds form with human-readable labels and units
- ServerDetailController: query only metrics that exist on server and display on graphs

For server 3 (mirv.top):
- After deploy, download new install.sh and reinstall agent
- This will add disk_used_root, ram_total_gb, temperatures support
This commit is contained in:
mirivlad 2026-04-17 09:25:43 +08:00
parent 3255bfae29
commit bce4c2e2d0
7 changed files with 570 additions and 333 deletions

Binary file not shown.

322
agent.py
View File

@ -8,11 +8,119 @@ import subprocess
import os
from datetime import datetime
# Скипаем виртуальные и служебные интерфейсы
SKIP_INTERFACE_PREFIXES = ('lo', 'docker', 'veth', 'br-', 'tun', 'tap', 'wg', 'virbr', 'vmnet', 'vmxnet')
# Храним предыдущие значения net_io для расчёта дельты
_prev_net_io = {}
def _is_real_interface(name, stats):
for prefix in SKIP_INTERFACE_PREFIXES:
if name.startswith(prefix):
return False
if not stats.isup:
return False
if stats.speed <= 0:
return False
return True
def get_network_metrics(interval=60):
global _prev_net_io
metrics = {}
try:
counters = psutil.net_io_counters(pernic=True)
stats = psutil.net_if_stats()
now = __import__('time').time()
for name, counter in counters.items():
if name not in stats:
continue
if not _is_real_interface(name, stats[name]):
continue
speed_mbps = stats[name].speed
speed_bps = speed_mbps * 1000000 / 8
if name in _prev_net_io:
prev = _prev_net_io[name]
elapsed = now - prev['time']
if elapsed > 0:
rx_delta = counter.bytes_recv - prev['rx']
tx_delta = counter.bytes_sent - prev['tx']
rx_pct = min((rx_delta / elapsed) / speed_bps * 100, 100.0)
tx_pct = min((tx_delta / elapsed) / speed_bps * 100, 100.0)
iface_key = name.replace('-', '_')
metrics[f'net_in_{iface_key}'] = round(rx_pct, 2)
metrics[f'net_out_{iface_key}'] = round(tx_pct, 2)
_prev_net_io[name] = {'rx': counter.bytes_recv, 'tx': counter.bytes_sent, 'time': now}
except Exception as e:
print(f'Ошибка сбора сетевых метрик: {e}')
return metrics
def _is_real_partition(mountpoint, fstype):
"""Проверяем что раздел реальный (не tmpfs, docker, snap и т.д.)"""
skip_fstypes = {'tmpfs', 'devtmpfs', 'overlay', 'squashfs', 'snap',
'devpts', 'proc', 'sysfs', 'cgroup', 'cgroup2',
'pstore', 'hugetlbfs', 'mqueue', 'debugfs',
'tracefs', 'bpf', 'fusectl', 'configfs',
'securityfs', 'ramfs'}
skip_mounts = {'/run', '/run/lock', '/sys', '/proc', '/dev',
'/dev/shm', '/dev/pts', '/sys/fs/cgroup'}
if fstype in skip_fstypes:
return False
if mountpoint in skip_mounts:
return False
# Пропускаем EFI — слишком маленький, не информативен
if mountpoint == '/boot/efi':
return False
return True
def get_disk_metrics():
"""Собираем метрики диска для примонтированных разделов"""
metrics = {}
total_used = 0
total_capacity = 0
priority_mounts = ['/', '/home', '/boot', '/var', '/opt', '/data', '/mnt', '/srv', '/tmp']
for mountpoint in priority_mounts:
try:
usage = psutil.disk_usage(mountpoint)
name = mountpoint.strip('/').replace('/', '_') or 'root'
if name not in metrics:
metrics[f'disk_used_{name}'] = round(usage.percent, 1)
total_used += usage.used
total_capacity += usage.total
except (PermissionError, OSError, FileNotFoundError):
pass
for part in psutil.disk_partitions(all=False):
name = part.mountpoint.strip('/').replace('/', '_') or 'root'
if name in metrics:
continue
if not _is_real_partition(part.mountpoint, part.fstype):
continue
try:
usage = psutil.disk_usage(part.mountpoint)
metrics[f'disk_used_{name}'] = round(usage.percent, 1)
except (PermissionError, OSError):
pass
if total_capacity > 0:
metrics['disk_used'] = round((total_used / total_capacity) * 100, 1)
return metrics
def get_metrics():
"""Сбор системных метрик"""
cpu_percent = psutil.cpu_percent(interval=1)
memory = psutil.virtual_memory()
disk_usage = psutil.disk_usage('/')
# Дисковые метрики для всех реальных разделов
disk_metrics = get_disk_metrics()
# Получаем сетевую статистику
try:
@ -20,26 +128,70 @@ def get_metrics():
except:
net_io = None
return {
result = {
'cpu_load': cpu_percent,
'ram_used': memory.percent,
'disk_used': disk_usage.percent
}
result.update(disk_metrics)
# Метрики использования сети
net_metrics = get_network_metrics()
result.update(net_metrics)
# RAM total GB
result["ram_total_gb"] = round(memory.total / (1024**3), 1)
# Disk total GB - сначала приоритетные mountpoints
priority_mounts = ['/', '/home', '/boot', '/var', '/opt', '/data', '/mnt', '/srv', '/tmp']
for mountpoint in priority_mounts:
try:
usage = psutil.disk_usage(mountpoint)
name = mountpoint.strip("/").replace("/", "_") or "root"
if f"disk_total_gb_{name}" not in result:
result[f"disk_total_gb_{name}"] = round(usage.total / (1024**3), 1)
except (PermissionError, OSError, FileNotFoundError):
pass
for part in psutil.disk_partitions(all=False):
try:
usage = psutil.disk_usage(part.mountpoint)
name = part.mountpoint.strip("/").replace("/", "_") or "root"
if f"disk_total_gb_{name}" not in result:
result[f"disk_total_gb_{name}"] = round(usage.total / (1024**3), 1)
except (PermissionError, OSError):
pass
if net_metrics:
print(f" Сетевые метрики: {net_metrics}")
# Сетевые метрики
if net_io:
result['network_rx'] = round(net_io.bytes_recv / (1024 * 1024), 2)
result['network_tx'] = round(net_io.bytes_sent / (1024 * 1024), 2)
return result
def get_top_processes(process_type='cpu'):
"""Сбор топ-5 процессов по CPU или RAM"""
processes = []
try:
for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent']):
for proc in psutil.process_iter(['pid', 'name', 'cpu_percent', 'memory_percent', 'cmdline']):
try:
info = proc.info
if info['cpu_percent'] is None or info['memory_percent'] is None:
continue
cmdline = info.get('cmdline') or []
if cmdline:
full_cmd = ' '.join(cmdline)
cmd_display = full_cmd[:120] + ('...' if len(full_cmd) > 120 else '')
else:
cmd_display = info.get('name', '')
processes.append({
'pid': info['pid'],
'name': info['name'],
'cmdline': cmd_display,
'value': round(info[process_type + '_percent'], 1)
})
except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
@ -61,45 +213,139 @@ def get_top_processes(process_type='cpu'):
return []
def get_services():
"""Сбор списка сервисов через systemctl"""
"""Сбор списка сервисов через systemctl (list-unit-files + list-units)"""
try:
result = subprocess.run(['systemctl', 'list-units', '--type=service', '--no-pager', '--all'],
capture_output=True, text=True, timeout=5)
# 1. Получаем полный список всех сервисов (включая dead/выгруженные)
res_files = subprocess.run(['systemctl', 'list-unit-files', '--type=service', '--no-pager'],
capture_output=True, text=True, timeout=10)
# 2. Получаем текущие статусы активных/загруженных сервисов
res_units = subprocess.run(['systemctl', 'list-units', '--type=service', '--all', '--no-pager'],
capture_output=True, text=True, timeout=10)
# Парсим unit-files (список всех сервисов)
all_services = {}
for line in res_files.stdout.split('\n'):
parts = line.split()
if parts and parts[0].endswith('.service'):
all_services[parts[0]] = {'name': parts[0], 'enabled_state': parts[1] if len(parts) > 1 else 'unknown'}
# Парсим list-units (текущее состояние)
running_states = {}
for line in res_units.stdout.split('\n'):
parts = line.split(None, 4)
if len(parts) >= 4 and parts[0].endswith('.service'):
running_states[parts[0]] = {
'load_state': parts[1],
'active_state': parts[2],
'sub_state': parts[3]
}
services = []
for line in result.stdout.split('\n')[1:]: # Пропускаем заголовок
if not line.strip():
continue
parts = line.split(None, 4) # Разделяем на 5 частей максимум
if len(parts) >= 4:
service_name = parts[0]
load_state = parts[1] if len(parts) > 1 else ''
active_state = parts[2] if len(parts) > 2 else ''
sub_state = parts[3] if len(parts) > 3 else ''
# Определяем статус сервиса
if active_state == 'active':
status = 'running'
elif active_state in ['inactive', 'failed']:
status = 'stopped'
else:
status = 'unknown'
services.append({
'name': service_name,
'status': status,
'load_state': load_state,
'active_state': active_state,
'sub_state': sub_state
})
# Объединяем: берем все сервисы из list-unit-files
for svc_name in all_services.keys():
if svc_name in running_states:
state = running_states[svc_name]
load = state['load_state']
active = state['active_state']
sub = state['sub_state']
else:
# Сервис есть в системе, но не загружен (dead)
load = 'loaded' # Обычно loaded, если файл юнита есть
active = 'inactive'
sub = 'dead'
if active == 'active':
status = 'running'
elif active in ['inactive', 'failed', 'deactivating']:
status = 'stopped'
else:
status = 'unknown'
services.append({
'name': svc_name,
'status': status,
'load_state': load,
'active_state': active,
'sub_state': sub
})
return services
except Exception as e:
print(f"Ошибка получения сервисов: {e}")
print(f"Ошибка получения списка сервисов: {e}")
return []
def get_temperatures():
"""Сбор температур (CPU, GPU, Disks)"""
temps = {}
# 1. CPU via psutil
try:
sensors = psutil.sensors_temperatures()
if sensors:
cpu_temps = []
for name, entries in sensors.items():
if name.lower() in ['coretemp', 'k10temp', 'zenpower']:
for entry in entries:
if entry.current:
cpu_temps.append(entry.current)
if cpu_temps:
temps['temp_cpu'] = max(cpu_temps)
elif not temps:
for entries in sensors.values():
for entry in entries:
if entry.current:
cpu_temps.append(entry.current)
if cpu_temps:
temps['temp_cpu'] = max(cpu_temps)
except Exception:
pass
# 2. Disks via smartctl
try:
import glob
disks = glob.glob('/dev/sd[a-z]') + glob.glob('/dev/nvme[0-9]n1')
for disk in disks:
res = subprocess.run(['smartctl', '-n', 'standby', '-A', disk],
capture_output=True, text=True, timeout=5)
if res.returncode == 0 and 'STANDBY' not in res.stdout.upper():
for line in res.stdout.split('\n'):
if 'Temperature' in line:
parts = line.split()
# Ищем число в диапазоне 10-100
for p in reversed(parts):
try:
v = int(p)
if 10 < v < 100:
disk_name = disk.split('/')[-1]
temps[f'temp_disk_{disk_name}'] = float(v)
break
except ValueError:
pass
except Exception:
pass
# 3. GPU via nvidia-smi
try:
res = subprocess.run(['nvidia-smi', '--query-gpu=temperature.gpu', '--format=csv,noheader'],
capture_output=True, text=True, timeout=5)
if res.returncode == 0:
lines = res.stdout.strip().split('\n')
if len(lines) == 1:
try:
temps['temp_gpu'] = float(lines[0])
except: pass
else:
for i, line in enumerate(lines):
try:
temps[f'temp_gpu_{i}'] = float(line)
except: pass
except Exception:
pass
return temps
def send_metrics():
"""Отправка метрик на сервер"""
with open('/opt/server-monitor-agent/config.json', 'r') as f:
@ -110,6 +356,8 @@ def send_metrics():
# Собираем метрики
metrics = get_metrics()
temps = get_temperatures()
metrics.update(temps)
# Собираем топ-процессы
top_cpu = get_top_processes('cpu')

View File

@ -0,0 +1,17 @@
-- 008: Авто-очистка старых метрик (старше 60 дней)
-- Запускается автоматически каждый день в 03:00
-- Создаём событие очистки (работает от mon_user если даны права EVENT)
-- Если mon_user не может создать событие — запустите вручную от root:
-- CREATE EVENT ... (см. ниже)
--
-- Для Docker: event_scheduler включается через docker-compose command
-- Для ручной установки: добавьте event_scheduler=ON в /etc/mysql/mariadb.conf.d/
-- Если есть привилегии — создаём событие:
CREATE EVENT IF NOT EXISTS daily_metrics_cleanup
ON SCHEDULE EVERY 1 DAY
STARTS CURRENT_DATE + INTERVAL 1 DAY + INTERVAL 3 HOUR
ON COMPLETION PRESERVE
DO
DELETE FROM server_metrics WHERE created_at < NOW() - INTERVAL 60 DAY;

View File

@ -233,6 +233,7 @@ $app->get('/api/status', function (Request $request, Response $response, $args)
$app->get('/agent/install.sh', [$agentController, 'generateInstallScript']);
$app->get('/agent/install.ps1', [$agentController, 'generateWindowsInstallScript']);
$app->get('/agent/install.bat', [$agentController, 'generateWindowsBatScript']);
$app->get('/agent/agent.py', [$agentController, 'downloadAgent']);
// Run app
$app->run();

View File

@ -17,7 +17,6 @@ class AgentController extends Model
$token = $queryParams['token'] ?? null;
$server_id = $queryParams['server_id'] ?? null;
// Если передан server_id, получаем оригинальный токен из зашифрованного
if (!empty($server_id) && empty($token)) {
$stmt = $this->pdo->prepare("SELECT encrypted_token FROM agent_tokens WHERE server_id = :server_id LIMIT 1");
$stmt->execute([':server_id' => $server_id]);
@ -34,250 +33,73 @@ class AgentController extends Model
}
$apiUrl = 'https://mon.mirv.top/api/v1/metrics';
$agentDownloadUrl = 'https://mon.mirv.top/agent/agent.py?token=' . $token;
// Формируем скрипт с прямой подстановкой значений
$script = "#!/bin/bash
$script = <<<BASH
#!/bin/bash
# Скрипт установки агента мониторинга с поддержкой сервисов
# =====================================================
# Скрипт установки агента мониторинга
# Сгенерировано автоматически
# =====================================================
TOKEN='" . $token . "'
API_URL='" . $apiUrl . "'
set -e
echo 'Установка агента мониторинга...'
TOKEN='{$token}'
API_URL='{$apiUrl}'
AGENT_URL='{$agentDownloadUrl}'
INSTALL_DIR='/opt/server-monitor-agent'
echo '=============================================='
echo ' Установка агента мониторинга серверов'
echo '=============================================='
echo ''
# Проверяем наличие Python3
if ! command -v python3 &> /dev/null; then
echo 'Установка Python3...'
apt-get update
apt-get install -y python3 python3-pip lm-sensors smartmontools
echo '[1/6] Установка Python3...'
apt-get update -qq
apt-get install -y -qq python3 python3-pip || apt-get install -y python3 python3-pip
else
echo '[1/6] Python3 найден'
fi
# Устанавливаем psutil
pip3 install psutil || easy_install3 psutil
# Устанавливаем зависимости (lm-sensors и smartmontools опциональны)
echo '[2/6] Установка зависимостей (psutil, lm-sensors, smartmontools)...'
pip3 install --quiet psutil 2>/dev/null || pip3 install psutil 2>/dev/null || true
apt-get install -y -qq lm-sensors smartmontools 2>/dev/null || true
# Создаем директорию для агента
mkdir -p /opt/server-monitor-agent
cd /opt/server-monitor-agent
echo '[3/6] Создание директории агента...'
mkdir -p "$INSTALL_DIR"
# Скачиваем агента
echo '[4/6] Скачивание агента...'
if ! curl -fsSL "$AGENT_URL" -o "$INSTALL_DIR/agent.py" 2>/dev/null; then
echo 'ERROR: Не удалось скачать агента. Проверьте токен и подключение к серверу.'
exit 1
fi
if ! grep -q 'psutil' "$INSTALL_DIR/agent.py"; then
echo 'ERROR: Скачанный файл не является агентом мониторинга.'
exit 1
fi
chmod +x "$INSTALL_DIR/agent.py"
# Создаем конфигурационный файл
echo '{
\\\"token\\\": \\\"" . $token . "\\\"\\,
\\\"api_url\\\": \\\"" . $apiUrl . "\\\"\\,
\\\"interval_seconds\\\": 60
}' > config.json
# Создаем Python-скрипт агента с поддержкой сервисов
cat > agent.py << 'PYTHON_EOF'
import time
import json
import psutil
import requests
import subprocess
import os
from datetime import datetime
def get_metrics():
\\\"\\\"\\\"Сбор системных метрик\\\"\\\"\\\"
cpu_percent = psutil.cpu_percent(interval=1)
memory = psutil.virtual_memory()
disk_usage = psutil.disk_usage('/')
# Получаем сетевую статистику
try:
net_io = psutil.net_io_counters()
except:
net_io = None
metrics = {
'cpu_load': round(cpu_percent, 2),
'ram_used': round(memory.percent, 2),
'disk_used': round((disk_usage.used / disk_usage.total) * 100, 2),
'network_in': round((net_io.bytes_recv / (1024*1024)) if net_io else 0, 2), # MB
'network_out': round((net_io.bytes_sent / (1024*1024)) if net_io else 0, 2) # MB
}
return metrics
def get_services():
\\\"\\\"\\\"Сбор статусов всех сервисов\\\"\\\"\\\"
services = []
try:
# Получаем список всех сервисов
result = subprocess.run(
['systemctl', 'list-units', '--type=service', '--all', '--no-pager'],
capture_output=True,
text=True,
timeout=30
)
lines = result.stdout.strip().split('\\n')
for line in lines[1:]: # Пропускаем заголовок
parts = line.split()
if len(parts) >= 4:
service_name = parts[0].replace('.service', '')
load_state = parts[1]
active_state = parts[2]
sub_state = parts[3] if len(parts) > 3 else ''
# Определяем статус сервиса
if active_state == 'active' and sub_state == 'running':
status = 'running'
elif active_state in ['inactive', 'failed', 'dead']:
status = 'stopped'
else:
status = 'unknown'
# Пропускаем системные сервисы без .service в имени
if not service_name.startswith('system-'):
services.append({
'name': service_name,
'status': status,
'load_state': load_state,
'active_state': active_state,
'sub_state': sub_state
})
except Exception as e:
print(f'Ошибка при получении списка сервисов: {e}')
return services
def get_config_from_server():
\\\"\\\"\\\"Получение конфигурации с сервера\\\"\\\"\\\"
try:
with open('config.json', 'r') as f:
config = json.load(f)
except Exception as e:
print(f'Ошибка чтения конфига: {e}')
return None
token = config.get('token')
if not token:
print('Отсутствует токен в конфиге')
return None
# Определяем URL для получения конфигурации
server_id = token.split('-')[0] if '-' in token else '1'
try:
response = requests.get(
f\\\"\\\"{config['api_url']}/agent/{server_id}/config\\\"\\\"\\\",
headers={'Authorization': f'Bearer {token}'},
timeout=10
)
if response.status_code == 200:
server_config = response.json()
# Обновляем локальный конфиг
config['interval_seconds'] = server_config.get('interval_seconds', config['interval_seconds'])
config['monitor_services'] = server_config.get('monitor_services', config.get('monitor_services', []))
# Сохраняем обновленный конфиг
with open('config.json', 'w') as f:
json.dump(config, f, indent=2)
return config
else:
print(f'Ошибка получения конфига с сервера: {response.status_code}')
return config
except Exception as e:
print(f'Ошибка подключения к серверу: {e}')
return config
def send_metrics(config, metrics, services):
\\\"\\\"\\\"Отправка метрик и сервисов на сервер\\\"\\\"\\\"
data = {
'token': config['token'],
'metrics': metrics,
'services': services
}
try:
response = requests.post(
config['api_url'],
json=data,
timeout=10
)
if response.status_code == 200:
print(f'{datetime.now().strftime(\\\"%Y-%m-%d %H:%M:%S\\\")} - Метрики отправлены успешно')
return True
else:
print(f'Ошибка отправки метрик: {response.status_code}')
return False
except Exception as e:
print(f'Ошибка отправки метрик: {e}')
return False
def main():
\\\"\\\"\\\"Главная функция агента\\\"\\\"\\\"
print('Агент мониторинга запущен...')
# Загружаем конфигурацию
config = get_config_from_server()
if not config:
print('Не удалось загрузить конфигурацию')
return
interval = config.get('interval_seconds', 60)
monitor_services = config.get('monitor_services', [])
print(f'Интервал отправки: {interval} сек')
print(f'Мониторинг сервисов: {\\\"включен\\\" if monitor_services else \\\"все сервисы\\\"}')
last_config_update = time.time()
while True:
try:
# Проверяем нужно ли обновить конфиг (каждые 5 минут)
if time.time() - last_config_update > 300:
print('Проверка обновления конфигурации...')
config = get_config_from_server()
last_config_update = time.time()
# Обновляем интервал если изменился
interval = config.get('interval_seconds', 60)
monitor_services = config.get('monitor_services', [])
# Собираем метрики
metrics = get_metrics()
# Собираем сервисы
services = get_services()
# Если указаны конкретные сервисы для мониторинга - фильтруем
if monitor_services:
services = [s for s in services if s['name'] in monitor_services]
print(f'Мониторинг {len(services)} сервисов: {[s[\\\"name\\\"] for s in services]}')
# Отправляем данные
success = send_metrics(config, metrics, services)
if success:
print(f'Метрики отправлены: CPU={metrics[\\\"cpu_load\\\"]}%, RAM={metrics[\\\"ram_used\\\"]}%, Disk={metrics[\\\"disk_used\\\"]}%')
else:
print('Ошибка отправки метрик')
# Ждем указанный интервал
time.sleep(interval)
except KeyboardInterrupt:
print('Агент остановлен')
break
except Exception as e:
print(f'Ошибка: {e}')
time.sleep(10)
if __name__ == '__main__':
main()
PYTHON_EOF
echo '[5/6] Создание конфигурации...'
cat > "$INSTALL_DIR/config.json" << CONFIG_EOF
{
"token": "$TOKEN",
"api_url": "$API_URL",
"interval_seconds": 60
}
CONFIG_EOF
# Создаем systemd сервис
cat > /etc/systemd/system/server-monitor-agent.service << 'SERVICE_EOF'
echo '[6/6] Регистрация системной службы...'
cat > /etc/systemd/system/server-monitor-agent.service << SERVICE_EOF
[Unit]
Description=Server Monitor Agent
After=network.target
@ -285,8 +107,8 @@ After=network.target
[Service]
Type=simple
User=root
WorkingDirectory=/opt/server-monitor-agent
ExecStart=/usr/bin/python3 /opt/server-monitor-agent/agent.py
WorkingDirectory=$INSTALL_DIR
ExecStart=/usr/bin/python3 $INSTALL_DIR/agent.py
Restart=always
RestartSec=10
@ -294,22 +116,22 @@ RestartSec=10
WantedBy=multi-user.target
SERVICE_EOF
# Делаем скрипт исполняемым
chmod +x agent.py
# Перезагружаем systemd
# Активируем и запускаем сервис
systemctl daemon-reload
# Включаем автозапуск сервиса
systemctl enable server-monitor-agent
# Запускаем сервис
systemctl stop server-monitor-agent 2>/dev/null || true
systemctl start server-monitor-agent
echo 'Агент мониторинга установлен и запущен!'
echo 'Статус сервиса:'
systemctl status server-monitor-agent
";
echo ''
echo '=============================================='
echo ' Агент мониторинга успешно установлен!'
echo '=============================================='
echo ''
echo 'Директория: $INSTALL_DIR'
echo 'Логи: journalctl -u server-monitor-agent -f'
echo 'Статус: systemctl status server-monitor-agent'
echo ''
BASH;
$response->getBody()->write($script);
return $response
@ -317,6 +139,44 @@ systemctl status server-monitor-agent
->withHeader('Content-Disposition', 'attachment; filename="install.sh"');
}
public function downloadAgent(Request $request, Response $response, $args)
{
$queryParams = $request->getQueryParams();
$token = $queryParams['token'] ?? null;
if (empty($token)) {
$response->getBody()->write('Token is required');
return $response->withStatus(403);
}
$tokenHash = hash('sha256', $token);
$stmt = $this->pdo->prepare("SELECT server_id FROM agent_tokens WHERE token_hash = :hash LIMIT 1");
$stmt->execute([':hash' => $tokenHash]);
$result = $stmt->fetch();
if (!$result) {
$response->getBody()->write('Invalid token');
return $response->withStatus(403);
}
$stmt = $this->pdo->prepare("UPDATE agent_tokens SET last_used_at = NOW() WHERE token_hash = :hash");
$stmt->execute([':hash' => $tokenHash]);
$agentPath = dirname(__DIR__, 2) . '/agent.py';
if (!file_exists($agentPath)) {
$response->getBody()->write('Agent not found');
return $response->withStatus(404);
}
$content = file_get_contents($agentPath);
return $response
->getBody()
->write($content)
->withHeader('Content-Type', 'text/plain; charset=UTF-8')
->withHeader('Content-Disposition', 'attachment; filename="agent.py"');
}
public function getConfig(Request $request, Response $response, $args)
{
$serverId = $args['id'];

View File

@ -200,8 +200,31 @@ class ServerDetailController extends Model
];
}
// Типы метрик
$stmt = $this->pdo->query("SELECT id, name, unit FROM metric_names WHERE name NOT LIKE '%\_proc' AND name NOT LIKE 'disk_total_gb_%' AND name != 'disk_used' AND name != 'ram_total_gb' AND name NOT IN ('net_in', 'net_out') AND name NOT LIKE 'network_%' ORDER BY name");
// Типы метрик — только те что отображаются на графиках и есть у сервера
$stmt = $this->pdo->prepare("
SELECT DISTINCT mn.id, mn.name, mn.unit
FROM metric_names mn
JOIN server_metrics sm ON sm.metric_name_id = mn.id
WHERE sm.server_id = :id
AND (
mn.name IN ('cpu_load', 'ram_used')
OR mn.name LIKE 'disk_used_%'
OR mn.name LIKE 'net_in_%'
OR mn.name LIKE 'net_out_%'
OR mn.name LIKE 'temp_%'
)
ORDER BY
CASE
WHEN mn.name = 'cpu_load' THEN 1
WHEN mn.name = 'ram_used' THEN 2
WHEN mn.name LIKE 'disk_used_%' THEN 3
WHEN mn.name LIKE 'net_in_%' THEN 4
WHEN mn.name LIKE 'net_out_%' THEN 5
WHEN mn.name LIKE 'temp_%' THEN 6
END,
mn.name
");
$stmt->execute([':id' => $id]);
$allMetricTypes = $stmt->fetchAll();
// Сервисы
@ -286,7 +309,31 @@ class ServerDetailController extends Model
$id = $args['id'];
$params = $request->getParsedBody();
$stmt = $this->pdo->query("SELECT id, name FROM metric_names WHERE name NOT LIKE '%\_proc' AND name NOT LIKE 'disk_total_gb_%' AND name != 'disk_used' AND name != 'ram_total_gb' AND name NOT IN ('net_in', 'net_out') AND name NOT LIKE 'network_%' ORDER BY name");
// Получаем только метрики которые есть у сервера и отображаются на графиках
$stmt = $this->pdo->prepare("
SELECT DISTINCT mn.id, mn.name, mn.unit
FROM metric_names mn
JOIN server_metrics sm ON sm.metric_name_id = mn.id
WHERE sm.server_id = :id
AND (
mn.name IN ('cpu_load', 'ram_used')
OR mn.name LIKE 'disk_used_%'
OR mn.name LIKE 'net_in_%'
OR mn.name LIKE 'net_out_%'
OR mn.name LIKE 'temp_%'
)
ORDER BY
CASE
WHEN mn.name = 'cpu_load' THEN 1
WHEN mn.name = 'ram_used' THEN 2
WHEN mn.name LIKE 'disk_used_%' THEN 3
WHEN mn.name LIKE 'net_in_%' THEN 4
WHEN mn.name LIKE 'net_out_%' THEN 5
WHEN mn.name LIKE 'temp_%' THEN 6
END,
mn.name
");
$stmt->execute([':id' => $id]);
$metricTypes = $stmt->fetchAll();
$stmt = $this->pdo->prepare("DELETE FROM metric_thresholds WHERE server_id = :server_id");

View File

@ -169,27 +169,40 @@
{% endif %}
{% endfor %}
{% for iface in net_interfaces %}
{% if metrics['net_in_' ~ iface] is defined and metrics['net_out_' ~ iface] is defined %}
<div class="row">
<div class="col-12 mb-4">
<div class="card">
<div class="card-header">
<h6 class="mb-0"><i class="fas fa-network-wired"></i> Сеть: {{ iface }}</h6>
</div>
<div class="card-body">
<canvas id="chart-net-{{ iface }}" width="100%" height="200"></canvas>
{% if net_interfaces|length > 0 %}
{% for iface in net_interfaces %}
{% if metrics['net_in_' ~ iface] is defined and metrics['net_out_' ~ iface] is defined %}
<div class="row">
<div class="col-12 mb-4">
<div class="card">
<div class="card-header">
<h6 class="mb-0"><i class="fas fa-network-wired"></i> Сеть: {{ iface }}</h6>
</div>
<div class="card-body">
<canvas id="chart-net-{{ iface }}" width="100%" height="200"></canvas>
</div>
</div>
</div>
</div>
</div>
{% endif %}
{% endfor %}
{% else %}
<div class="alert alert-warning mb-4">
<i class="fas fa-network-wired"></i> Данные о сетевых интерфейсах не получены
</div>
{% endif %}
{% endfor %}
<!-- Температуры: один общий график -->
{% set has_temps = false %}
{% for metricName in metrics|keys %}
{% if metricName starts with 'temp_' %}
{% set has_temps = true %}
{% endif %}
{% endfor %}
<div class="row mb-4">
<div class="col-12">
<div class="card">
@ -197,44 +210,65 @@
<h6 class="mb-0"><i class="fas fa-thermometer-half"></i> Температуры</h6>
</div>
<div class="card-body">
<canvas id="chart-temperatures" width="100%" height="300"></canvas>
{% if has_temps %}
<canvas id="chart-temperatures" width="100%" height="300"></canvas>
{% else %}
<div class="alert alert-info mb-0">
<i class="fas fa-thermometer-half"></i> Температурные датчики недоступны (возможно виртуальный сервер)
</div>
{% endif %}
</div>
</div>
</div>
</div>
<!-- Диски: Doughnut графики -->
{% set has_disk_parts = false %}
{% for metricName in metrics|keys %}
{% if metricName starts with 'disk_used_' and metricName != 'disk_used' %}
{% set has_disk_parts = true %}
{% endif %}
{% endfor %}
<div class="row mb-3">
{% for metricName, metricData in metrics %}
{% if metricName starts with 'disk_used_' and metricName != 'disk_used' %}
<div class="col-md-4 mb-3">
<div class="card h-100">
<div class="card-body text-center">
<h6 class="card-title mb-2">
{% if metricName == 'disk_used_root' %}/ (корень)
{% elseif metricName == 'disk_used_home' %}/home
{% elseif metricName == 'disk_used_boot' %}/boot
{% elseif metricName == 'disk_used_mnt_data' %}/mnt/data
{% else %}{{ metricName|replace({'disk_used_': '', '_': ' '})|title }}
{% endif %}
</h6>
{% set pct = metricData[0].value|round(1) %}
{% set iface = metricName|replace({'disk_used_': ''}) %}
{% set totalGB = metrics['disk_total_gb_' ~ iface][0].value|default(0) %}
{% set usedGB = (pct / 100 * totalGB)|round(1) %}
{% set freeGB = (totalGB - usedGB)|round(1) %}
<div class="mb-1">
<span class="badge bg-success">Свободно: {{ freeGB }} ГБ</span>
<span class="badge bg-danger ms-1">Занято: {{ usedGB }} ГБ</span>
{% if has_disk_parts %}
{% for metricName, metricData in metrics %}
{% if metricName starts with 'disk_used_' and metricName != 'disk_used' %}
<div class="col-md-4 mb-3">
<div class="card h-100">
<div class="card-body text-center">
<h6 class="card-title mb-2">
{% if metricName == 'disk_used_root' %}/ (корень)
{% elseif metricName == 'disk_used_home' %}/home
{% elseif metricName == 'disk_used_boot' %}/boot
{% elseif metricName == 'disk_used_mnt_data' %}/mnt/data
{% else %}{{ metricName|replace({'disk_used_': '', '_': ' '})|title }}
{% endif %}
</h6>
{% set pct = metricData[0].value|round(1) %}
{% set iface = metricName|replace({'disk_used_': ''}) %}
{% set totalGB = metrics['disk_total_gb_' ~ iface][0].value|default(0) %}
{% set usedGB = (pct / 100 * totalGB)|round(1) %}
{% set freeGB = (totalGB - usedGB)|round(1) %}
<div class="mb-1">
<span class="badge bg-success">Свободно: {{ freeGB }} ГБ</span>
<span class="badge bg-danger ms-1">Занято: {{ usedGB }} ГБ</span>
</div>
<p class="text-muted small mb-1">{{ pct }}% из {{ totalGB }} ГБ</p>
<p class="text-muted small">{{ metricData[0].created_at|date('d.m.Y H:i') }}</p>
<div style="max-width: 150px; margin: 0 auto;"><canvas id="chart-{{ metricName }}"></canvas></div>
</div>
<p class="text-muted small mb-1">{{ pct }}% из {{ totalGB }} ГБ</p>
<p class="text-muted small">{{ metricData[0].created_at|date('d.m.Y H:i') }}</p>
<div style="max-width: 150px; margin: 0 auto;"><canvas id="chart-{{ metricName }}"></canvas></div>
</div>
</div>
{% endif %}
{% endfor %}
{% else %}
<div class="col-12">
<div class="alert alert-warning">
<i class="fas fa-hdd"></i> Данные о разделах диска не получены. Проверьте работу агента.
</div>
</div>
{% endif %}
{% endfor %}
{% endif %}
</div>
</div>
@ -412,12 +446,36 @@
<div class="col-12">
<form method="post" action="/servers/{{ server.id }}/thresholds">
{% for metricType in allMetricTypes %}
{% set metricUnit = '%' %}
{% set metricLabel = metricType.name %}
{% if metricType.name starts with 'temp_' %}
{% set metricUnit = '°C' %}
{% set metricLabel = 'Температура ' ~ (metricType.name|replace({'temp_': '', '_': ' '}))|title %}
{% elseif metricType.name == 'cpu_load' %}
{% set metricLabel = 'Загрузка CPU' %}
{% elseif metricType.name == 'ram_used' %}
{% set metricLabel = 'Использование RAM' %}
{% elseif metricType.name starts with 'disk_used_' %}
{% set iface = metricType.name|replace({'disk_used_': ''}) %}
{% if iface == 'root' %}{% set metricLabel = 'Диск (корень /)' %}
{% elseif iface == 'home' %}{% set metricLabel = 'Диск (/home)' %}
{% elseif iface == 'boot' %}{% set metricLabel = 'Диск (/boot)' %}
{% elseif iface == 'mnt_data' %}{% set metricLabel = 'Диск (/mnt/data)' %}
{% else %}{% set metricLabel = 'Диск (/' ~ (iface|replace({'_': '/'})) ~ ')' %}
{% endif %}
{% elseif metricType.name starts with 'net_in_' %}
{% set iface = metricType.name|replace({'net_in_': ''}) %}
{% set metricLabel = 'Сеть входящая (' ~ iface ~ ')' %}
{% elseif metricType.name starts with 'net_out_' %}
{% set iface = metricType.name|replace({'net_out_': ''}) %}
{% set metricLabel = 'Сеть исходящая (' ~ iface ~ ')' %}
{% endif %}
<div class="card mb-2">
<div class="card-body py-2">
<div class="row align-items-center">
<div class="col-md-3 mb-2 mb-md-0">
<strong>{{ metricType.name|replace({'_': ' ', 'load': 'загрузка', 'used': 'использование'})|title }}</strong>
{% if metricType.unit %}<small class="text-muted">({{ metricType.unit }})</small>{% endif %}
<strong>{{ metricLabel }}</strong>
<small class="text-muted">({{ metricUnit }})</small>
</div>
<div class="col-md-3 mb-2 mb-md-0">
<div class="input-group input-group-sm" title="Порог предупреждения">
@ -430,7 +488,7 @@
{% if existingThresholds[metricType.name].warning is defined %}
value="{{ existingThresholds[metricType.name].warning }}"
{% endif %}>
<span class="input-group-text">%</span>
<span class="input-group-text">{{ metricUnit }}</span>
</div>
</div>
<div class="col-md-3 mb-2 mb-md-0">
@ -444,7 +502,7 @@
{% if existingThresholds[metricType.name].critical is defined %}
value="{{ existingThresholds[metricType.name].critical }}"
{% endif %}>
<span class="input-group-text">%</span>
<span class="input-group-text">{{ metricUnit }}</span>
</div>
</div>
<div class="col-md-3">
@ -566,7 +624,7 @@ document.addEventListener('DOMContentLoaded', function() {
});
// Параметры системы
var ramTotalGB = {{ metrics['ram_total_gb'] is defined ? metrics['ram_total_gb'][0].value : 0 }};
var ramTotalGB = {{ metrics['ram_total_gb'] is defined ? metrics['ram_total_gb'][0].value : 'null' }};
var diskTotalGB = {
{% for m, _data in metrics %}
{% if m starts with 'disk_total_gb_' %}
@ -664,12 +722,18 @@ const chart{{ metricName|replace({'-': '_', '.': '_'}) }} = new Chart(ctx{{ metr
lines.push('Время: ' + time);
{% if metricName == 'ram_used' %}
var ramPct = data{{ metricName }}[dataIndex];
var ramUsed = (ramPct / 100 * ramTotalGB).toFixed(1);
var ramFree = (ramTotalGB - ramUsed).toFixed(1);
lines.push('Всего: ' + ramTotalGB.toFixed(1) + ' ГБ');
lines.push('Занято: ' + ramUsed + ' ГБ');
lines.push('Свободно: ' + ramFree + ' ГБ');
lines.push('');
if (ramTotalGB !== null) {
var ramUsed = (ramPct / 100 * ramTotalGB).toFixed(1);
var ramFree = (ramTotalGB - ramUsed).toFixed(1);
lines.push('Всего: ' + ramTotalGB.toFixed(1) + ' ГБ');
lines.push('Занято: ' + ramUsed + ' ГБ');
lines.push('Свободно: ' + ramFree + ' ГБ');
lines.push('');
} else {
lines.push('RAM: ' + ramPct + '%');
lines.push('(данные о памяти недоступны)');
lines.push('');
}
if (data.top_ram && data.top_ram.length > 0) {
lines.push('TOP RAM:');
data.top_ram.forEach(function(proc) {