Add infrastructure standards and deployment conventions

This commit is contained in:
Oskar Kapala 2026-05-07 21:16:03 +02:00
parent 2b5d59ae27
commit d0540f7eb8
20 changed files with 307 additions and 116 deletions

29
.gitignore vendored
View file

@ -1,2 +1,31 @@
# Environment variables
.env
*.env
!*.env.example
# IDE artifacts
.idea/
.vscode/
*.swp
*.swo
# Python
__pycache__/
*.py[cod]
*$py.class
venv/
.venv/
# Tools
.aider*
.codex
# OS files
.DS_Store
Thumbs.db
# Runtime data (should live outside repo, but adding just in case)
data/
logs/
temp/
tmp/

10
.idea/.gitignore vendored
View file

@ -1,10 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Ignored default folder with query files
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

View file

@ -1,5 +0,0 @@
<component name="ProjectCodeStyleConfiguration">
<state>
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
</state>
</component>

View file

@ -1,9 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View file

@ -1,7 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="KubernetesApiProvider"><![CDATA[{}]]></component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_23" default="true" project-jdk-name="23" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

View file

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/homelab-codex-ws.iml" filepath="$PROJECT_DIR$/.idea/homelab-codex-ws.iml" />
</modules>
</component>
</project>

View file

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View file

@ -1,85 +1,38 @@
# Homelab Current State
# Homelab Codex
## Description
GitOps-lite orchestration for a distributed homelab environment.
This repository documents the current known state of the homelab.
## Architecture
The documentation is based only on stated facts. Missing details are recorded as unknowns and need clarification.
The homelab consists of several nodes connected via a Tailscale internal mesh.
## Shared context sync lock
| Host | Role | Description |
|------|------|-------------|
| **SATURN** | Primary Node | Development, orchestration, and git source of truth (commit node). |
| **SOLARIA** | Compute Node | GPU, inference, and heavy compute workloads. |
| **PIHA** | Infra Node | Core infrastructure services, automation, and monitoring. |
| **VPS** | Edge Node | Public ingress, reverse proxy, and edge services. |
`sync-context.sh` uses a git-tracked `.context.lock` file to serialize updates to `codex_context.yaml`.
## Repository Structure
If `codex_context.yaml` has changes, the script:
- `docs/`: [Infrastructure Standards](docs/standards.md) and [Deployment Conventions](docs/deployment.md).
- `hosts/`: Host-specific configurations and service assignments.
- `services/`: Reusable Docker Compose service definitions.
- `scripts/`: Deployment and management scripts.
1. pulls with rebase
2. aborts if `.context.lock` already exists and prints its contents
3. creates `.context.lock` with `hostname`, `username`, and UTC `timestamp`
4. commits and pushes the lock with message `lock shared context`
5. validates `codex_context.yaml`, commits it, and pushes
6. removes `.context.lock`, commits `unlock shared context`, and pushes
## Getting Started
If any step fails after lock creation, the script prints `Lock may need manual cleanup` and leaves the lock in place.
1. **Standardization**: Follow the [Infrastructure Standards](docs/standards.md).
2. **Deployment**: See [Deployment Conventions](docs/deployment.md) for how to roll out changes.
3. **SATURN**: Remember that SATURN is the only node where commits should be made.
Manual cleanup:
1. inspect `.context.lock`
2. confirm the owning host/user is no longer updating context
3. remove the file
4. commit `unlock shared context`
5. push
## Current configuration
- Main server hardware: Raspberry Pi 5
- Core stack:
- Docker
- Portainer
- Nginx Proxy Manager
- Network position: behind NAT
- Public access path: Nginx Proxy Manager with HTTPS using Let's Encrypt
- Private access path: Tailscale
- Known port forwarding:
- External ports `80-81` forward to internal ports `4480-4481`
- External port `443` forwards to internal port `4443`
## Documentation index
## Documentation Index
- [Infrastructure Standards](docs/standards.md)
- [Deployment Conventions](docs/deployment.md)
- [Hardware](docs/hardware.md)
- [Core stack](docs/core-stack.md)
- [Networking](docs/networking.md)
- [Access](docs/access.md)
- [Services](docs/services.md)
- [Hetzner VPS](docs/hetzner-vps.md)
- [Joplin Server](docs/joplin-server.md)
- [Unknowns and clarification questions](docs/questions.md)
## Known facts
- The homelab has one known main server: Raspberry Pi 5.
- Docker is part of the current stack.
- Portainer is part of the current stack.
- Nginx Proxy Manager is part of the current stack.
- The homelab is behind NAT.
- Public services are exposed through Nginx Proxy Manager with HTTPS certificates from Let's Encrypt.
- Private access is provided through Tailscale.
- A Hetzner VPS handoff has been received from another Codex session.
- The Hetzner VPS hostname is `ubuntu-4gb-hel1-1`.
- The Hetzner VPS Tailscale IP is `100.95.58.48`.
- `100.108.208.3` is explicitly not the Hetzner VPS Tailscale IP.
- Nginx Proxy Manager is running on the Hetzner VPS as container `npm`.
- Joplin Server files exist on the Hetzner VPS, but Joplin is not running yet.
## Unknown / needs clarification
- Operating system and version on the Raspberry Pi 5.
- Storage layout and attached disks.
- Network interface configuration.
- LAN IP addresses.
- Public domain names for the Raspberry Pi 5 services.
- List of all running containers.
- Exact Nginx Proxy Manager proxy host configuration.
- Tailscale tailnet, device name, and subnet/exit-node configuration if any.
- Backup configuration.
- Monitoring and alerting configuration.
- Whether the Hetzner VPS is part of the homelab, a separate public edge, or both.
---
*Note: This repository documents the state of the homelab. Runtime state lives outside the repository in `/opt/homelab`.*

40
docs/deployment.md Normal file
View file

@ -0,0 +1,40 @@
# Deployment Conventions
This document describes the GitOps-lite deployment process for the homelab.
## Principles
1. **Git as Source of Truth**: All infrastructure definitions (Docker Compose, configurations) are stored in Git.
2. **Unidirectional Flow**: Changes flow from **SATURN** (commit node) to execution nodes.
3. **Lightweight**: No complex orchestrators (no Kubernetes). Use `docker compose` and simple shell scripts.
4. **Tailscale Mesh**: All hosts are connected via Tailscale, allowing secure communication without public port exposure.
## Deployment Process
### 1. Preparation (on SATURN)
- Modify or create service definitions in `services/`.
- Assign services to hosts by creating/updating `hosts/<hostname>/services.txt` (or similar mapping).
- Commit and push changes to the Forgejo instance.
### 2. Deployment (on Execution Node)
Execution nodes run a deployment script (e.g., via cron or manual trigger) that:
1. Performs a `git pull` from the source of truth.
2. Identifies services assigned to this host.
3. Symlinks or copies `services/<service>/docker-compose.yml` to `/opt/homelab/services/`.
4. Runs `docker compose up -d --remove-orphans`.
## Host-Local Overrides
If a service requires host-specific configuration (e.g., unique device paths for GPUs on SOLARIA):
1. Create a `docker-compose.override.yml` in `/opt/homelab/config/<service>/`.
2. The deployment script should include this override if it exists.
## Secrets Management
- **Do NOT commit secrets to Git.**
- Secrets should be placed in `/opt/homelab/config/<service>/.env` on the target host.
- The deployment script should ensure these are sourced by Docker Compose.

68
docs/standards.md Normal file
View file

@ -0,0 +1,68 @@
# Infrastructure Standards
This document defines the standards and conventions for the homelab GitOps-lite environment.
## Host Architecture
| Host | Role | Description |
|------|------|-------------|
| **SATURN** | Primary Node | Development, orchestration, and git source of truth (commit node). |
| **SOLARIA** | Compute Node | GPU, inference, and heavy compute workloads. |
| **PIHA** | Infra Node | Core infrastructure services, automation, and monitoring. |
| **VPS** | Edge Node | Public ingress, reverse proxy, and edge services. |
## Directory Layout
### Repository Layout
```text
/
├── docs/ # Infrastructure documentation
├── hosts/ # Host-specific configurations
│ ├── saturn/
│ ├── solaria/
│ ├── piha/
│ └── vps/
├── services/ # Reusable service definitions (Docker Compose)
├── scripts/ # Management and deployment scripts
└── README.md
```
### Runtime Layout (on Execution Nodes)
Runtime state must live outside the repository to keep it immutable and clean.
```text
/opt/homelab/
├── services/ # Active docker-compose files (deployed from git)
├── data/ # Persistent volume data (backed up)
├── config/ # Host-local overrides and secrets (not in git)
└── logs/ # Service logs
```
## Docker Compose Standards
1. **File Naming**: Use `docker-compose.yml`.
2. **Container Naming**: `service-name`.
3. **Restarts**: Always use `restart: unless-stopped`.
4. **Networking**:
- Use `tailscale` internal mesh for inter-host communication.
- Expose ports only when necessary.
5. **Volumes**: Use named volumes or absolute paths to `/opt/homelab/data/service-name`.
## Environment Variables
- `.env`: Default environment variables (checked into git if safe).
- `.env.local`: Host-specific overrides (not in git).
## Naming Conventions
- Hosts: All caps (SATURN, SOLARIA, PIHA, VPS).
- Services: Kebab-case (e.g., `ollama-server`).
- Containers: Match service name.
## Deployment Flow
1. Changes are committed and pushed to **SATURN**.
2. Execution nodes (SOLARIA, PIHA, VPS) pull changes.
3. Deployment scripts trigger `docker compose up -d`.

14
hosts/piha/README.md Normal file
View file

@ -0,0 +1,14 @@
# PIHA - Infrastructure + Automation Node
## Role
- Core network services.
- Home automation (Home Assistant).
- Monitoring and logging.
## Configured Services
- Home Assistant
- Mosquitto (MQTT)
- Zigbee2MQTT
## Runtime Data
- `/opt/homelab/data/homeassistant`

13
hosts/saturn/README.md Normal file
View file

@ -0,0 +1,13 @@
# SATURN - Primary Development & Orchestration Node
## Role
- Source of truth for all infrastructure Git repositories.
- Primary workstation for development and configuration management.
- The ONLY node allowed to commit changes to the homelab repositories.
## Configured Services
(List services deployed on this host)
- Forgejo (Git source of truth)
## Runtime Data
- `/opt/homelab/data/forgejo`

12
hosts/solaria/README.md Normal file
View file

@ -0,0 +1,12 @@
# SOLARIA - Compute / GPU / Inference Node
## Role
- High-performance compute tasks.
- GPU-accelerated workloads (LLM inference, transcoding).
## Configured Services
- Ollama
- Open WebUI
## Runtime Data
- `/opt/homelab/data/ollama`

13
hosts/vps/README.md Normal file
View file

@ -0,0 +1,13 @@
# VPS - Public Edge + Ingress Node
## Role
- Public-facing reverse proxy.
- HTTPS termination (Let's Encrypt).
- Edge security and routing.
## Configured Services
- Nginx Proxy Manager (NPM)
- Authelia / Authentik (Auth)
## Runtime Data
- `/opt/homelab/data/npm`

1
hosts/vps/services.txt Normal file
View file

@ -0,0 +1 @@
npm

66
scripts/deploy/deploy-node.sh Executable file
View file

@ -0,0 +1,66 @@
#!/usr/bin/env bash
# deployment-node.sh - To be run on the execution node (SOLARIA, PIHA, VPS)
# This script pulls the latest changes and ensures services are running.
set -e
# Configuration
REPO_PATH="${HOME}/homelab-codex-ws"
RUNTIME_PATH="/opt/homelab"
HOSTNAME=$(hostname | tr '[:lower:]' '[:upper:]')
echo "--- Starting Deployment on ${HOSTNAME} ---"
# 1. Update Repository
if [ ! -d "$REPO_PATH" ]; then
echo "Error: Repository not found at $REPO_PATH"
exit 1
fi
cd "$REPO_PATH"
echo "Pulling latest changes..."
git pull
# 2. Identify Services
# Based on our convention, we look for services assigned to this host
# For now, we'll check if a 'services.txt' exists in the host folder
SERVICE_LIST="${REPO_PATH}/hosts/$(hostname | tr '[:upper:]' '[:lower:]')/services.txt"
if [ ! -f "$SERVICE_LIST" ]; then
echo "No services.txt found for ${HOSTNAME}. Skipping service deployment."
exit 0
fi
# 3. Deploy Services
while IFS= read -r service || [ -n "$service" ]; do
[[ "$service" =~ ^#.*$ ]] && continue # Skip comments
[[ -z "$service" ]] && continue # Skip empty lines
echo "Deploying service: ${service}..."
COMPOSE_FILE="${REPO_PATH}/services/${service}/docker-compose.yml"
if [ ! -f "$COMPOSE_FILE" ]; then
echo "Warning: Compose file not found for ${service} at ${COMPOSE_FILE}"
continue
fi
# Target directory in runtime
TARGET_DIR="${RUNTIME_PATH}/services/${service}"
mkdir -p "$TARGET_DIR"
# We use the compose file from the repo directly
# but we can also handle overrides here
OVERRIDE_FILE="${RUNTIME_PATH}/config/${service}/docker-compose.override.yml"
COMPOSE_CMD="docker compose -f ${COMPOSE_FILE}"
if [ -f "$OVERRIDE_FILE" ]; then
echo "Using override file for ${service}"
COMPOSE_CMD="${COMPOSE_CMD} -f ${OVERRIDE_FILE}"
fi
$COMPOSE_CMD up -d --remove-orphans
done < "$SERVICE_LIST"
echo "--- Deployment Complete ---"

View file

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# orchestrate-deploy.sh - To be run on SATURN
# Triggers deployment on remote execution nodes.
set -e
HOSTS=("solaria" "piha" "vps")
USER="oskar" # Default user
for HOST in "${HOSTS[@]}"; do
echo ">>> Triggering deployment on ${HOST}..."
ssh "${USER}@${HOST}" "bash ~/homelab-codex-ws/scripts/deploy/deploy-node.sh"
done
echo ">>> All deployments triggered."

0
services/.gitkeep Normal file
View file

View file

@ -0,0 +1,12 @@
services:
npm:
image: 'jc21/nginx-proxy-manager:latest'
container_name: npm
restart: unless-stopped
ports:
- '80:80'
- '81:81'
- '443:443'
volumes:
- /opt/homelab/data/npm/data:/data
- /opt/homelab/data/npm/letsencrypt:/etc/letsencrypt