feat(healthchecks): ping services periodically to check for availability + post results to Discord

This commit is contained in:
Marc 2024-03-07 22:21:57 -05:00
parent ded4fe92b0
commit 9bbd0364d0
Signed by: marc
GPG key ID: 048E042F22B5DC79
22 changed files with 726 additions and 0 deletions

8
.gitignore vendored
View file

@ -2,3 +2,11 @@
env.yml
# Taskfile binary via bootstrap.sh
bin
**/config.json
# Python artifacts
**pycache**
**/*.pyc
**/.venv/**
**.egg-info**

View file

@ -0,0 +1 @@
**/*_test.py

View file

@ -0,0 +1 @@
3.12

View file

@ -0,0 +1,15 @@
FROM python:3.12 AS base
WORKDIR /app
COPY requirements.txt .
RUN pip install -r requirements.txt
FROM base AS app
ENV HEALTHCHECK_CONFIG_PATH "/app/config.json"
COPY healthcheck ./healthcheck
CMD python -m uvicorn --host "0.0.0.0" healthcheck.main:app

View file

@ -0,0 +1,21 @@
# Healthcheck reporter
Periodically checks if resources are reacheable and reports via a configurable webhook.
## Configuration
A `config.json` file should be provided and follow the schema outlined in `use_cases.Configuration`:
```json
{
"endpoints": {
"service-a": "https://service-a.com",
"service-b": "http://service-b:8080",
...
},
"webhook_url": "https://my-webhook.com/",
"check_interval": 3600
}
```
Every `check_interval` seconds, the application will attempt to reach each of the services and post a message summarizing the results to `webhook_url`.

5
services/healthcheck/build.sh Executable file
View file

@ -0,0 +1,5 @@
#!/usr/bin/bash
source ./constants.sh
podman build . -t "$APP_IMAGE_NAME":"$IMAGE_VERSION"

View file

@ -0,0 +1,5 @@
#!/bin/bash
export APP_NAME="healthcheck"
export APP_CONTAINER_NAME=$APP_NAME-app
export APP_IMAGE_NAME=$CONTAINER_NAME_PREFIX-$APP_CONTAINER_NAME

View file

@ -0,0 +1,42 @@
"""
Shared fixtures.
"""
import json
import pytest
@pytest.fixture
def anyio_backend():
"""Sets the default anyio backend."""
return "asyncio"
@pytest.fixture(name="mock_configuration")
def f_mock_configuration():
"""Sample configuration"""
return {
"endpoints": {"test-service": "http://test.local"},
"webhook_url": "http://webhook.local",
"check_interval": 0.1,
}
@pytest.fixture(name="set_up_mock_configuration")
def f_set_up_mock_configuration(monkeypatch, tmp_path, mock_configuration):
"""
Initializes a file with the sample configuration data and sets the
HEALTHCHECK_CONFIG_PATH variable to point to it.
"""
def _fixture():
config_path = tmp_path / "config.json"
config_path.write_text(json.dumps(mock_configuration))
monkeypatch.setenv("HEALTHCHECK_CONFIG_PATH", str(config_path))
return config_path
return _fixture

View file

@ -0,0 +1,55 @@
"""
Healthcheck reporting service.
This periodically checks if certain URLs respond and
reports on those responses.
"""
import asyncio
import contextlib
import logging
import fastapi
from healthcheck.tasks import report_on_statuses
from healthcheck.use_cases import check_all_statuses, load_configuration
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
background_tasks = set()
@contextlib.asynccontextmanager
async def lifespan(_):
"""Starts and stops asynchronous tasks on application lifecycle."""
status_checks = asyncio.create_task(report_on_statuses())
background_tasks.add(status_checks)
logger.info("Started reporting loop.")
yield
for task in background_tasks:
task.cancel()
app = fastapi.FastAPI(lifespan=lifespan)
@app.get("/")
def alive():
"""Is the application alive?"""
return 200
@app.get("/config")
def configuration():
"""Check available configuration"""
return fastapi.responses.JSONResponse(load_configuration().model_dump())
@app.get("/status")
def check_status():
"""Checks endpoints respond"""
config = load_configuration()
return check_all_statuses(config.endpoints)

View file

@ -0,0 +1,52 @@
"""
Test coverage for endpoints and use cases.
"""
import pytest
from fastapi.testclient import TestClient
from healthcheck.main import app
@pytest.fixture(name="client")
def f_client():
"""Test HTTP client."""
return TestClient(app)
def test_alive_check_returns_200(client):
"""Alive check returns 200."""
response = client.get("/")
assert response.status_code == 200
def test_check_configuration_returns_config_and_200(
client, set_up_mock_configuration, mock_configuration
):
"""Check configuration returns the loaded configuration."""
set_up_mock_configuration()
response = client.get("/config")
assert response.json() == mock_configuration
def test_check_status_returns_status_summary(
httpx_mock, client, set_up_mock_configuration
):
"""
Check status endpoint checks all configured endpoints and reports on
availability.
"""
set_up_mock_configuration()
httpx_mock.add_response(url="http://test.local", status_code=200)
response = client.get("/status")
assert response.status_code == 200
response_body = response.json()
assert response_body == {"test-service": True}

View file

@ -0,0 +1,41 @@
"""
Defines asynchronous tasks loaded on startup.
"""
import logging
import time
import typing
from healthcheck.use_cases import check_all_statuses, load_configuration, post_message
logger = logging.getLogger(__name__)
async def report_on_statuses(*, max_iterations: typing.Optional[int] = None):
"""
Reports on all registered services.
"""
config = load_configuration()
iterations = 0
while max_iterations is None or iterations < max_iterations:
statuses = check_all_statuses(config.endpoints)
message_lines = []
for service, status in statuses.items():
if status:
message_lines.append(f"{service} is healthy.")
else:
message_lines.append(f"🔥 {service} is not responding normally.")
message = "\n".join(message_lines)
logger.info(message)
post_message(config.webhook_url, message)
iterations += 1
time.sleep(config.check_interval)

View file

@ -0,0 +1,113 @@
"""
Asynchronous task tests.
"""
import json
import pytest
from healthcheck.tasks import report_on_statuses
pytestmark = pytest.mark.anyio
async def test_report_on_statuses_pings_registered_endpoints(
httpx_mock, mock_configuration, set_up_mock_configuration
):
"""Each run pings each specified service."""
mock_url = mock_configuration["endpoints"]["test-service"]
set_up_mock_configuration()
httpx_mock.add_response(url=mock_url)
httpx_mock.add_response(url=mock_configuration["webhook_url"])
await report_on_statuses(max_iterations=1)
requests_captured = httpx_mock.get_requests()
get_requests = tuple(
request for request in requests_captured if request.method == "GET"
)
assert len(get_requests) == 1
assert str(get_requests[0].url) == mock_url
async def test_report_on_statuses_posts_message_to_the_webhook_url(
httpx_mock, mock_configuration, set_up_mock_configuration
):
"""Each run posts a message to the webhook for reporting."""
mock_url = mock_configuration["endpoints"]["test-service"]
set_up_mock_configuration()
httpx_mock.add_response(url=mock_url)
httpx_mock.add_response(url=mock_configuration["webhook_url"])
await report_on_statuses(max_iterations=1)
requests_captured = httpx_mock.get_requests()
post_requests = tuple(
request for request in requests_captured if request.method == "POST"
)
# Only one webhook request is made.
assert len(post_requests) == 1
webhook_post = post_requests[0]
# The request goes to the webhook URL.
assert str(webhook_post.url) == mock_configuration["webhook_url"]
@pytest.mark.parametrize(
"status_code, expected",
[[200, "is healthy"], [400, "is not responding normally"]],
ids=["healthy", "unhealthy"],
)
async def test_report_on_statuses_posts_message_describing_service_status(
status_code, expected, httpx_mock, mock_configuration, set_up_mock_configuration
):
"""Each message describes whether the service is healthy or not."""
mock_url = mock_configuration["endpoints"]["test-service"]
set_up_mock_configuration()
httpx_mock.add_response(url=mock_url, status_code=status_code)
httpx_mock.add_response(url=mock_configuration["webhook_url"])
await report_on_statuses(max_iterations=1)
requests_captured = httpx_mock.get_requests()
post_requests = tuple(
request for request in requests_captured if request.method == "POST"
)
webhook_post = post_requests[0]
posted_message = json.loads(webhook_post.content)
assert expected in posted_message["content"]
@pytest.mark.parametrize("iterations", [1, 10])
async def test_report_on_statuses_runs_at_most_n_times_if_max_iterations_specified(
iterations, httpx_mock, mock_configuration, set_up_mock_configuration
):
"""Iterations can be capped to a certain count."""
mock_url = mock_configuration["endpoints"]["test-service"]
set_up_mock_configuration()
httpx_mock.add_response(url=mock_url)
httpx_mock.add_response(url=mock_configuration["webhook_url"])
await report_on_statuses(max_iterations=iterations)
requests_captured = httpx_mock.get_requests()
# Each iteration pings once, posts once.
assert len(requests_captured) == 2 * iterations

View file

@ -0,0 +1,97 @@
"""
Business logic for endpoints and tasks.
"""
import functools
import json
import logging
import os
import pathlib
import httpx
import pydantic
logger = logging.getLogger(__name__)
class Configuration(pydantic.BaseModel):
"""Service configuration"""
endpoints: dict[str, str]
webhook_url: str
check_interval: float
@functools.cache
def load_configuration() -> Configuration:
"""
Loads configuration from disk.
If the HEALTHCHECK_CONFIG_PATH env variable is not set, raises.
If the configuration file is not valid json, raises.
If the configuration data doesn't satisfy the Configuration type, raises.
"""
raw_config_path = os.getenv("HEALTHCHECK_CONFIG_PATH")
if not raw_config_path:
raise RuntimeError(
"No configuration path provided. HEALTHCHECK_CONFIG_PATH must be set."
)
config_path = pathlib.Path(raw_config_path)
if not config_path.exists():
raise RuntimeError(f"Configuration file does not exist at {config_path}")
with open(config_path, "r", encoding="utf8") as config_file:
config_raw = config_file.read()
try:
config = json.loads(config_raw)
except Exception as e:
raise RuntimeError(
"Failed to parse configuration file at {config_path}: {str(e)}"
) from e
return Configuration(**config)
def check_all_statuses(endpoints: dict[str, str]) -> dict[str, bool]:
"""
Pings all the specified endpoint and produces a mapping describing
whether the target responded with a "OK-ish" status (i.e. 2XX).
Exceptions raised while requesting are logged and reported as failures
to check.
"""
status_summary = {}
for service_name, service_url in endpoints.items():
try:
response = httpx.get(service_url)
response.raise_for_status()
except Exception: # pylint: disable=broad-except
logger.exception(
"Failed to check health of %s: (%s)", service_name, service_url
)
status_summary[service_name] = False
else:
status_summary[service_name] = True
return status_summary
def post_message(webhook_url: str, message: str):
"""
Posts a message to a Discord webhook URL.
See https://discord.com/developers/docs/resources/webhook#execute-webhook for
payload schema.
"""
payload = {"content": message}
response = httpx.post(webhook_url, json=payload)
response.raise_for_status()

View file

@ -0,0 +1,14 @@
"""
Business logic tests.
"""
from healthcheck.use_cases import load_configuration
def test_load_configuration(set_up_mock_configuration, mock_configuration):
"""Checks that configuration can be loaded."""
set_up_mock_configuration()
configuration = load_configuration()
assert configuration.model_dump() == mock_configuration

View file

@ -0,0 +1,34 @@
[project]
name = "healthcheck"
version = "0.0.0"
requires-python = ">= 3.12"
dependencies = [
"fastapi",
"httpx",
"pydantic",
"uvicorn[standard]",
]
[project.optional-dependencies]
dev = [
"anyio",
"black",
"pylint",
"httpx",
"pytest",
"pytest-httpx",
"isort",
]
[tool.setuptools]
packages = ["healthcheck"]
[tool.pytest.ini_options]
pythonpath=[
".",
"./healthcheck",
]
python_files=[
"*_test.py"
]

View file

@ -0,0 +1,58 @@
annotated-types==0.6.0
# via pydantic
anyio==4.3.0
# via
# httpx
# starlette
# watchfiles
certifi==2024.2.2
# via
# httpcore
# httpx
click==8.1.7
# via uvicorn
fastapi==0.110.0
# via healthcheck (pyproject.toml)
h11==0.14.0
# via
# httpcore
# uvicorn
httpcore==1.0.4
# via httpx
httptools==0.6.1
# via uvicorn
httpx==0.27.0
# via healthcheck (pyproject.toml)
idna==3.6
# via
# anyio
# httpx
pydantic==2.6.3
# via
# fastapi
# healthcheck (pyproject.toml)
pydantic-core==2.16.3
# via pydantic
python-dotenv==1.0.1
# via uvicorn
pyyaml==6.0.1
# via uvicorn
sniffio==1.3.1
# via
# anyio
# httpx
starlette==0.36.3
# via fastapi
typing-extensions==4.10.0
# via
# fastapi
# pydantic
# pydantic-core
uvicorn[standard]==0.27.1
# via healthcheck (pyproject.toml)
uvloop==0.19.0
# via uvicorn
watchfiles==0.21.0
# via uvicorn
websockets==12.0
# via uvicorn

View file

@ -0,0 +1,134 @@
annotated-types==0.6.0
# via
# -c requirements.txt
# pydantic
anyio==4.3.0
# via
# -c requirements.txt
# healthcheck (pyproject.toml)
# httpx
# starlette
# watchfiles
astroid==3.1.0
# via pylint
black==24.2.0
# via healthcheck (pyproject.toml)
certifi==2024.2.2
# via
# -c requirements.txt
# httpcore
# httpx
click==8.1.7
# via
# -c requirements.txt
# black
# uvicorn
dill==0.3.8
# via pylint
fastapi==0.110.0
# via
# -c requirements.txt
# healthcheck (pyproject.toml)
h11==0.14.0
# via
# -c requirements.txt
# httpcore
# uvicorn
httpcore==1.0.4
# via
# -c requirements.txt
# httpx
httptools==0.6.1
# via
# -c requirements.txt
# uvicorn
httpx==0.27.0
# via
# -c requirements.txt
# healthcheck (pyproject.toml)
# pytest-httpx
idna==3.6
# via
# -c requirements.txt
# anyio
# httpx
iniconfig==2.0.0
# via pytest
isort==5.13.2
# via
# healthcheck (pyproject.toml)
# pylint
mccabe==0.7.0
# via pylint
mypy-extensions==1.0.0
# via black
packaging==23.2
# via
# black
# pytest
pathspec==0.12.1
# via black
platformdirs==4.2.0
# via
# black
# pylint
pluggy==1.4.0
# via pytest
pydantic==2.6.3
# via
# -c requirements.txt
# fastapi
# healthcheck (pyproject.toml)
pydantic-core==2.16.3
# via
# -c requirements.txt
# pydantic
pylint==3.1.0
# via healthcheck (pyproject.toml)
pytest==8.0.2
# via
# healthcheck (pyproject.toml)
# pytest-httpx
pytest-httpx==0.30.0
# via healthcheck (pyproject.toml)
python-dotenv==1.0.1
# via
# -c requirements.txt
# uvicorn
pyyaml==6.0.1
# via
# -c requirements.txt
# uvicorn
sniffio==1.3.1
# via
# -c requirements.txt
# anyio
# httpx
starlette==0.36.3
# via
# -c requirements.txt
# fastapi
tomlkit==0.12.4
# via pylint
typing-extensions==4.10.0
# via
# -c requirements.txt
# fastapi
# pydantic
# pydantic-core
uvicorn[standard]==0.27.1
# via
# -c requirements.txt
# healthcheck (pyproject.toml)
uvloop==0.19.0
# via
# -c requirements.txt
# uvicorn
watchfiles==0.21.0
# via
# -c requirements.txt
# uvicorn
websockets==12.0
# via
# -c requirements.txt
# uvicorn

View file

@ -0,0 +1,9 @@
#!/usr/bin/bash
python -m venv .venv
. .venv/bin/activate
pip install -U pip~=24.0 pip-tools~=7.3.0
pip-sync requirements.txt requirements_dev.txt

View file

@ -0,0 +1,6 @@
#!/usr/bin/bash
PYTHON=.venv/bin/python
$PYTHON -m piptools compile -o requirements.txt pyproject.toml --no-header \
&& $PYTHON -m piptools compile -o requirements_dev.txt --no-header --extra dev --constraint requirements.txt pyproject.toml

10
services/healthcheck/start.sh Executable file
View file

@ -0,0 +1,10 @@
#!/usr/bin/bash
source ./constants.sh
podman run \
--detach \
--pod services \
-v ./config.json:/app/config.json \
--name "$APP_NAME" \
"$APP_IMAGE_NAME":"$IMAGE_VERSION"

View file

@ -0,0 +1,5 @@
#!/usr/bin/bash
source ./constants.sh
podman rm -f $APP_NAME