mirror of
https://github.com/inventree/InvenTree.git
synced 2026-07-04 14:10:52 +00:00
System Health Checks (#12193)
* Add worker health check invoke task * Increase frequency of heartbeat task * Adjust default threshold for worker health check * Add server_health invoke func
This commit is contained in:
@@ -1603,6 +1603,67 @@ def worker(c, verbose: bool = False):
|
||||
manage(c, 'qcluster', pty=True, verbose=verbose)
|
||||
|
||||
|
||||
@task(help={'timeout': 'Maximum minutes since last heartbeat (default: 3)'})
|
||||
def worker_health(c, timeout: int = 3):
|
||||
"""Check if the background worker is healthy by reading the heartbeat file.
|
||||
|
||||
Exits 0 if the worker has run within the last TIMEOUT minutes, 1 otherwise.
|
||||
No Django startup or database access is required.
|
||||
"""
|
||||
heartbeat_file = Path(tempfile.gettempdir()) / 'inventree_worker_heartbeat'
|
||||
|
||||
if heartbeat_file.exists():
|
||||
try:
|
||||
age_seconds = time.time() - float(heartbeat_file.read_text().strip())
|
||||
if age_seconds < timeout * 60:
|
||||
success(
|
||||
f'Worker is healthy (last heartbeat {int(age_seconds) // 60}m {int(age_seconds) % 60}s ago)'
|
||||
)
|
||||
return
|
||||
warning(
|
||||
f'Heartbeat file is stale ({int(age_seconds) // 60}m {int(age_seconds) % 60}s old)'
|
||||
)
|
||||
except Exception as e:
|
||||
warning(f'Could not read heartbeat file: {e}')
|
||||
else:
|
||||
warning(f'Heartbeat file not found: {heartbeat_file}')
|
||||
|
||||
error('Worker health check failed')
|
||||
raise Exit(code=1)
|
||||
|
||||
|
||||
@task(
|
||||
help={
|
||||
'address': 'Server address to check (default: http://localhost:8000)',
|
||||
'timeout': 'Request timeout in seconds (default: 5)',
|
||||
}
|
||||
)
|
||||
def server_health(c, address: str = 'http://localhost:8000', timeout: int = 5):
|
||||
"""Check if the web server is healthy by requesting /api/system/health/.
|
||||
|
||||
Exits 0 on HTTP 200, 1 otherwise.
|
||||
No Django startup required.
|
||||
"""
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
url = f'{address.rstrip("/")}/api/system/health/'
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(url, timeout=timeout) as response:
|
||||
if response.status == 200:
|
||||
success(f'Server is healthy ({url})')
|
||||
return
|
||||
warning(f'Unexpected status {response.status} from {url}')
|
||||
except urllib.error.URLError as e:
|
||||
warning(f'Could not reach server at {url}: {e.reason}')
|
||||
except Exception as e:
|
||||
warning(f'Unexpected error checking {url}: {e}')
|
||||
|
||||
error('Server health check failed')
|
||||
raise Exit(code=1)
|
||||
|
||||
|
||||
@task(post=[static, server])
|
||||
def test_translations(c):
|
||||
"""Add a fictional language to test if each component is ready for translations."""
|
||||
@@ -2457,6 +2518,8 @@ ns = Collection(
|
||||
version,
|
||||
wait,
|
||||
worker,
|
||||
worker_health,
|
||||
server_health,
|
||||
monitor,
|
||||
build_docs,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user