Add infrastructure standards and deployment conventions
This commit is contained in:
parent
2b5d59ae27
commit
d0540f7eb8
29
.gitignore
vendored
29
.gitignore
vendored
|
|
@ -1,2 +1,31 @@
|
|||
# Environment variables
|
||||
.env
|
||||
*.env
|
||||
!*.env.example
|
||||
|
||||
# IDE artifacts
|
||||
.idea/
|
||||
.vscode/
|
||||
*.swp
|
||||
*.swo
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
venv/
|
||||
.venv/
|
||||
|
||||
# Tools
|
||||
.aider*
|
||||
.codex
|
||||
|
||||
# OS files
|
||||
.DS_Store
|
||||
Thumbs.db
|
||||
|
||||
# Runtime data (should live outside repo, but adding just in case)
|
||||
data/
|
||||
logs/
|
||||
temp/
|
||||
tmp/
|
||||
|
|
|
|||
10
.idea/.gitignore
vendored
10
.idea/.gitignore
vendored
|
|
@ -1,10 +0,0 @@
|
|||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
# Ignored default folder with query files
|
||||
/queries/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
<component name="ProjectCodeStyleConfiguration">
|
||||
<state>
|
||||
<option name="PREFERRED_PROJECT_CODE_STYLE" value="Default" />
|
||||
</state>
|
||||
</component>
|
||||
|
|
@ -1,9 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" inherit-compiler-output="true">
|
||||
<exclude-output />
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="KubernetesApiProvider"><![CDATA[{}]]></component>
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_23" default="true" project-jdk-name="23" project-jdk-type="JavaSDK">
|
||||
<output url="file://$PROJECT_DIR$/out" />
|
||||
</component>
|
||||
</project>
|
||||
|
|
@ -1,8 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/homelab-codex-ws.iml" filepath="$PROJECT_DIR$/.idea/homelab-codex-ws.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
95
README.md
95
README.md
|
|
@ -1,85 +1,38 @@
|
|||
# Homelab Current State
|
||||
# Homelab Codex
|
||||
|
||||
## Description
|
||||
GitOps-lite orchestration for a distributed homelab environment.
|
||||
|
||||
This repository documents the current known state of the homelab.
|
||||
## Architecture
|
||||
|
||||
The documentation is based only on stated facts. Missing details are recorded as unknowns and need clarification.
|
||||
The homelab consists of several nodes connected via a Tailscale internal mesh.
|
||||
|
||||
## Shared context sync lock
|
||||
| Host | Role | Description |
|
||||
|------|------|-------------|
|
||||
| **SATURN** | Primary Node | Development, orchestration, and git source of truth (commit node). |
|
||||
| **SOLARIA** | Compute Node | GPU, inference, and heavy compute workloads. |
|
||||
| **PIHA** | Infra Node | Core infrastructure services, automation, and monitoring. |
|
||||
| **VPS** | Edge Node | Public ingress, reverse proxy, and edge services. |
|
||||
|
||||
`sync-context.sh` uses a git-tracked `.context.lock` file to serialize updates to `codex_context.yaml`.
|
||||
## Repository Structure
|
||||
|
||||
If `codex_context.yaml` has changes, the script:
|
||||
- `docs/`: [Infrastructure Standards](docs/standards.md) and [Deployment Conventions](docs/deployment.md).
|
||||
- `hosts/`: Host-specific configurations and service assignments.
|
||||
- `services/`: Reusable Docker Compose service definitions.
|
||||
- `scripts/`: Deployment and management scripts.
|
||||
|
||||
1. pulls with rebase
|
||||
2. aborts if `.context.lock` already exists and prints its contents
|
||||
3. creates `.context.lock` with `hostname`, `username`, and UTC `timestamp`
|
||||
4. commits and pushes the lock with message `lock shared context`
|
||||
5. validates `codex_context.yaml`, commits it, and pushes
|
||||
6. removes `.context.lock`, commits `unlock shared context`, and pushes
|
||||
## Getting Started
|
||||
|
||||
If any step fails after lock creation, the script prints `Lock may need manual cleanup` and leaves the lock in place.
|
||||
1. **Standardization**: Follow the [Infrastructure Standards](docs/standards.md).
|
||||
2. **Deployment**: See [Deployment Conventions](docs/deployment.md) for how to roll out changes.
|
||||
3. **SATURN**: Remember that SATURN is the only node where commits should be made.
|
||||
|
||||
Manual cleanup:
|
||||
|
||||
1. inspect `.context.lock`
|
||||
2. confirm the owning host/user is no longer updating context
|
||||
3. remove the file
|
||||
4. commit `unlock shared context`
|
||||
5. push
|
||||
|
||||
## Current configuration
|
||||
|
||||
- Main server hardware: Raspberry Pi 5
|
||||
- Core stack:
|
||||
- Docker
|
||||
- Portainer
|
||||
- Nginx Proxy Manager
|
||||
- Network position: behind NAT
|
||||
- Public access path: Nginx Proxy Manager with HTTPS using Let's Encrypt
|
||||
- Private access path: Tailscale
|
||||
- Known port forwarding:
|
||||
- External ports `80-81` forward to internal ports `4480-4481`
|
||||
- External port `443` forwards to internal port `4443`
|
||||
|
||||
## Documentation index
|
||||
## Documentation Index
|
||||
|
||||
- [Infrastructure Standards](docs/standards.md)
|
||||
- [Deployment Conventions](docs/deployment.md)
|
||||
- [Hardware](docs/hardware.md)
|
||||
- [Core stack](docs/core-stack.md)
|
||||
- [Networking](docs/networking.md)
|
||||
- [Access](docs/access.md)
|
||||
- [Services](docs/services.md)
|
||||
- [Hetzner VPS](docs/hetzner-vps.md)
|
||||
- [Joplin Server](docs/joplin-server.md)
|
||||
- [Unknowns and clarification questions](docs/questions.md)
|
||||
|
||||
## Known facts
|
||||
|
||||
- The homelab has one known main server: Raspberry Pi 5.
|
||||
- Docker is part of the current stack.
|
||||
- Portainer is part of the current stack.
|
||||
- Nginx Proxy Manager is part of the current stack.
|
||||
- The homelab is behind NAT.
|
||||
- Public services are exposed through Nginx Proxy Manager with HTTPS certificates from Let's Encrypt.
|
||||
- Private access is provided through Tailscale.
|
||||
- A Hetzner VPS handoff has been received from another Codex session.
|
||||
- The Hetzner VPS hostname is `ubuntu-4gb-hel1-1`.
|
||||
- The Hetzner VPS Tailscale IP is `100.95.58.48`.
|
||||
- `100.108.208.3` is explicitly not the Hetzner VPS Tailscale IP.
|
||||
- Nginx Proxy Manager is running on the Hetzner VPS as container `npm`.
|
||||
- Joplin Server files exist on the Hetzner VPS, but Joplin is not running yet.
|
||||
|
||||
## Unknown / needs clarification
|
||||
|
||||
- Operating system and version on the Raspberry Pi 5.
|
||||
- Storage layout and attached disks.
|
||||
- Network interface configuration.
|
||||
- LAN IP addresses.
|
||||
- Public domain names for the Raspberry Pi 5 services.
|
||||
- List of all running containers.
|
||||
- Exact Nginx Proxy Manager proxy host configuration.
|
||||
- Tailscale tailnet, device name, and subnet/exit-node configuration if any.
|
||||
- Backup configuration.
|
||||
- Monitoring and alerting configuration.
|
||||
- Whether the Hetzner VPS is part of the homelab, a separate public edge, or both.
|
||||
---
|
||||
*Note: This repository documents the state of the homelab. Runtime state lives outside the repository in `/opt/homelab`.*
|
||||
|
|
|
|||
Binary file not shown.
40
docs/deployment.md
Normal file
40
docs/deployment.md
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
# Deployment Conventions
|
||||
|
||||
This document describes the GitOps-lite deployment process for the homelab.
|
||||
|
||||
## Principles
|
||||
|
||||
1. **Git as Source of Truth**: All infrastructure definitions (Docker Compose, configurations) are stored in Git.
|
||||
2. **Unidirectional Flow**: Changes flow from **SATURN** (commit node) to execution nodes.
|
||||
3. **Lightweight**: No complex orchestrators (no Kubernetes). Use `docker compose` and simple shell scripts.
|
||||
4. **Tailscale Mesh**: All hosts are connected via Tailscale, allowing secure communication without public port exposure.
|
||||
|
||||
## Deployment Process
|
||||
|
||||
### 1. Preparation (on SATURN)
|
||||
|
||||
- Modify or create service definitions in `services/`.
|
||||
- Assign services to hosts by creating/updating `hosts/<hostname>/services.txt` (or similar mapping).
|
||||
- Commit and push changes to the Forgejo instance.
|
||||
|
||||
### 2. Deployment (on Execution Node)
|
||||
|
||||
Execution nodes run a deployment script (e.g., via cron or manual trigger) that:
|
||||
|
||||
1. Performs a `git pull` from the source of truth.
|
||||
2. Identifies services assigned to this host.
|
||||
3. Symlinks or copies `services/<service>/docker-compose.yml` to `/opt/homelab/services/`.
|
||||
4. Runs `docker compose up -d --remove-orphans`.
|
||||
|
||||
## Host-Local Overrides
|
||||
|
||||
If a service requires host-specific configuration (e.g., unique device paths for GPUs on SOLARIA):
|
||||
|
||||
1. Create a `docker-compose.override.yml` in `/opt/homelab/config/<service>/`.
|
||||
2. The deployment script should include this override if it exists.
|
||||
|
||||
## Secrets Management
|
||||
|
||||
- **Do NOT commit secrets to Git.**
|
||||
- Secrets should be placed in `/opt/homelab/config/<service>/.env` on the target host.
|
||||
- The deployment script should ensure these are sourced by Docker Compose.
|
||||
68
docs/standards.md
Normal file
68
docs/standards.md
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
# Infrastructure Standards
|
||||
|
||||
This document defines the standards and conventions for the homelab GitOps-lite environment.
|
||||
|
||||
## Host Architecture
|
||||
|
||||
| Host | Role | Description |
|
||||
|------|------|-------------|
|
||||
| **SATURN** | Primary Node | Development, orchestration, and git source of truth (commit node). |
|
||||
| **SOLARIA** | Compute Node | GPU, inference, and heavy compute workloads. |
|
||||
| **PIHA** | Infra Node | Core infrastructure services, automation, and monitoring. |
|
||||
| **VPS** | Edge Node | Public ingress, reverse proxy, and edge services. |
|
||||
|
||||
## Directory Layout
|
||||
|
||||
### Repository Layout
|
||||
|
||||
```text
|
||||
/
|
||||
├── docs/ # Infrastructure documentation
|
||||
├── hosts/ # Host-specific configurations
|
||||
│ ├── saturn/
|
||||
│ ├── solaria/
|
||||
│ ├── piha/
|
||||
│ └── vps/
|
||||
├── services/ # Reusable service definitions (Docker Compose)
|
||||
├── scripts/ # Management and deployment scripts
|
||||
└── README.md
|
||||
```
|
||||
|
||||
### Runtime Layout (on Execution Nodes)
|
||||
|
||||
Runtime state must live outside the repository to keep it immutable and clean.
|
||||
|
||||
```text
|
||||
/opt/homelab/
|
||||
├── services/ # Active docker-compose files (deployed from git)
|
||||
├── data/ # Persistent volume data (backed up)
|
||||
├── config/ # Host-local overrides and secrets (not in git)
|
||||
└── logs/ # Service logs
|
||||
```
|
||||
|
||||
## Docker Compose Standards
|
||||
|
||||
1. **File Naming**: Use `docker-compose.yml`.
|
||||
2. **Container Naming**: `service-name`.
|
||||
3. **Restarts**: Always use `restart: unless-stopped`.
|
||||
4. **Networking**:
|
||||
- Use `tailscale` internal mesh for inter-host communication.
|
||||
- Expose ports only when necessary.
|
||||
5. **Volumes**: Use named volumes or absolute paths to `/opt/homelab/data/service-name`.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
- `.env`: Default environment variables (checked into git if safe).
|
||||
- `.env.local`: Host-specific overrides (not in git).
|
||||
|
||||
## Naming Conventions
|
||||
|
||||
- Hosts: All caps (SATURN, SOLARIA, PIHA, VPS).
|
||||
- Services: Kebab-case (e.g., `ollama-server`).
|
||||
- Containers: Match service name.
|
||||
|
||||
## Deployment Flow
|
||||
|
||||
1. Changes are committed and pushed to **SATURN**.
|
||||
2. Execution nodes (SOLARIA, PIHA, VPS) pull changes.
|
||||
3. Deployment scripts trigger `docker compose up -d`.
|
||||
14
hosts/piha/README.md
Normal file
14
hosts/piha/README.md
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
# PIHA - Infrastructure + Automation Node
|
||||
|
||||
## Role
|
||||
- Core network services.
|
||||
- Home automation (Home Assistant).
|
||||
- Monitoring and logging.
|
||||
|
||||
## Configured Services
|
||||
- Home Assistant
|
||||
- Mosquitto (MQTT)
|
||||
- Zigbee2MQTT
|
||||
|
||||
## Runtime Data
|
||||
- `/opt/homelab/data/homeassistant`
|
||||
13
hosts/saturn/README.md
Normal file
13
hosts/saturn/README.md
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# SATURN - Primary Development & Orchestration Node
|
||||
|
||||
## Role
|
||||
- Source of truth for all infrastructure Git repositories.
|
||||
- Primary workstation for development and configuration management.
|
||||
- The ONLY node allowed to commit changes to the homelab repositories.
|
||||
|
||||
## Configured Services
|
||||
(List services deployed on this host)
|
||||
- Forgejo (Git source of truth)
|
||||
|
||||
## Runtime Data
|
||||
- `/opt/homelab/data/forgejo`
|
||||
12
hosts/solaria/README.md
Normal file
12
hosts/solaria/README.md
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
# SOLARIA - Compute / GPU / Inference Node
|
||||
|
||||
## Role
|
||||
- High-performance compute tasks.
|
||||
- GPU-accelerated workloads (LLM inference, transcoding).
|
||||
|
||||
## Configured Services
|
||||
- Ollama
|
||||
- Open WebUI
|
||||
|
||||
## Runtime Data
|
||||
- `/opt/homelab/data/ollama`
|
||||
13
hosts/vps/README.md
Normal file
13
hosts/vps/README.md
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# VPS - Public Edge + Ingress Node
|
||||
|
||||
## Role
|
||||
- Public-facing reverse proxy.
|
||||
- HTTPS termination (Let's Encrypt).
|
||||
- Edge security and routing.
|
||||
|
||||
## Configured Services
|
||||
- Nginx Proxy Manager (NPM)
|
||||
- Authelia / Authentik (Auth)
|
||||
|
||||
## Runtime Data
|
||||
- `/opt/homelab/data/npm`
|
||||
1
hosts/vps/services.txt
Normal file
1
hosts/vps/services.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
npm
|
||||
66
scripts/deploy/deploy-node.sh
Executable file
66
scripts/deploy/deploy-node.sh
Executable file
|
|
@ -0,0 +1,66 @@
|
|||
#!/usr/bin/env bash
|
||||
# deployment-node.sh - To be run on the execution node (SOLARIA, PIHA, VPS)
|
||||
# This script pulls the latest changes and ensures services are running.
|
||||
|
||||
set -e
|
||||
|
||||
# Configuration
|
||||
REPO_PATH="${HOME}/homelab-codex-ws"
|
||||
RUNTIME_PATH="/opt/homelab"
|
||||
HOSTNAME=$(hostname | tr '[:lower:]' '[:upper:]')
|
||||
|
||||
echo "--- Starting Deployment on ${HOSTNAME} ---"
|
||||
|
||||
# 1. Update Repository
|
||||
if [ ! -d "$REPO_PATH" ]; then
|
||||
echo "Error: Repository not found at $REPO_PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$REPO_PATH"
|
||||
echo "Pulling latest changes..."
|
||||
git pull
|
||||
|
||||
# 2. Identify Services
|
||||
# Based on our convention, we look for services assigned to this host
|
||||
# For now, we'll check if a 'services.txt' exists in the host folder
|
||||
SERVICE_LIST="${REPO_PATH}/hosts/$(hostname | tr '[:upper:]' '[:lower:]')/services.txt"
|
||||
|
||||
if [ ! -f "$SERVICE_LIST" ]; then
|
||||
echo "No services.txt found for ${HOSTNAME}. Skipping service deployment."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 3. Deploy Services
|
||||
while IFS= read -r service || [ -n "$service" ]; do
|
||||
[[ "$service" =~ ^#.*$ ]] && continue # Skip comments
|
||||
[[ -z "$service" ]] && continue # Skip empty lines
|
||||
|
||||
echo "Deploying service: ${service}..."
|
||||
|
||||
COMPOSE_FILE="${REPO_PATH}/services/${service}/docker-compose.yml"
|
||||
|
||||
if [ ! -f "$COMPOSE_FILE" ]; then
|
||||
echo "Warning: Compose file not found for ${service} at ${COMPOSE_FILE}"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Target directory in runtime
|
||||
TARGET_DIR="${RUNTIME_PATH}/services/${service}"
|
||||
mkdir -p "$TARGET_DIR"
|
||||
|
||||
# We use the compose file from the repo directly
|
||||
# but we can also handle overrides here
|
||||
OVERRIDE_FILE="${RUNTIME_PATH}/config/${service}/docker-compose.override.yml"
|
||||
|
||||
COMPOSE_CMD="docker compose -f ${COMPOSE_FILE}"
|
||||
if [ -f "$OVERRIDE_FILE" ]; then
|
||||
echo "Using override file for ${service}"
|
||||
COMPOSE_CMD="${COMPOSE_CMD} -f ${OVERRIDE_FILE}"
|
||||
fi
|
||||
|
||||
$COMPOSE_CMD up -d --remove-orphans
|
||||
|
||||
done < "$SERVICE_LIST"
|
||||
|
||||
echo "--- Deployment Complete ---"
|
||||
15
scripts/deploy/orchestrate-deploy.sh
Executable file
15
scripts/deploy/orchestrate-deploy.sh
Executable file
|
|
@ -0,0 +1,15 @@
|
|||
#!/usr/bin/env bash
|
||||
# orchestrate-deploy.sh - To be run on SATURN
|
||||
# Triggers deployment on remote execution nodes.
|
||||
|
||||
set -e
|
||||
|
||||
HOSTS=("solaria" "piha" "vps")
|
||||
USER="oskar" # Default user
|
||||
|
||||
for HOST in "${HOSTS[@]}"; do
|
||||
echo ">>> Triggering deployment on ${HOST}..."
|
||||
ssh "${USER}@${HOST}" "bash ~/homelab-codex-ws/scripts/deploy/deploy-node.sh"
|
||||
done
|
||||
|
||||
echo ">>> All deployments triggered."
|
||||
0
services/.gitkeep
Normal file
0
services/.gitkeep
Normal file
12
services/npm/docker-compose.yml
Normal file
12
services/npm/docker-compose.yml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
services:
|
||||
npm:
|
||||
image: 'jc21/nginx-proxy-manager:latest'
|
||||
container_name: npm
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- '80:80'
|
||||
- '81:81'
|
||||
- '443:443'
|
||||
volumes:
|
||||
- /opt/homelab/data/npm/data:/data
|
||||
- /opt/homelab/data/npm/letsencrypt:/etc/letsencrypt
|
||||
Loading…
Reference in a new issue