From b47992f7701529f37dcec8187434e5c73fd41bbf Mon Sep 17 00:00:00 2001 From: Thomas Hallock Date: Wed, 14 Jan 2026 17:04:01 -0600 Subject: [PATCH] feat(deploy): add blue-green deployment with health endpoint - Add /api/health endpoint that checks database connectivity - Set up blue-green deployment with two containers (abaci-blue, abaci-green) - Add docker-compose.yaml with YAML anchors for DRY config - Add generate-compose.sh to create blue/green compose files from main - Update deploy.sh with NAS-specific fixes (scp -O, PATH for docker) - Fix deploy.sh to not overwrite production .env by default The blue-green setup allows zero-downtime deployments via compose-updater, which watches separate compose files and restarts containers independently. Co-Authored-By: Claude Opus 4.5 --- DEPLOYMENT.md | 312 ++++++++++++----------- apps/web/.claude/DEPLOYMENT.md | 261 +++++++------------ apps/web/src/app/api/health/route.ts | 69 +++++ nas-deployment/deploy.sh | 186 +++++++++++--- nas-deployment/docker-compose.blue.yaml | 45 ++++ nas-deployment/docker-compose.green.yaml | 45 ++++ nas-deployment/docker-compose.yaml | 124 ++++----- nas-deployment/generate-compose.sh | 48 ++++ 8 files changed, 678 insertions(+), 412 deletions(-) create mode 100644 apps/web/src/app/api/health/route.ts create mode 100644 nas-deployment/docker-compose.blue.yaml create mode 100644 nas-deployment/docker-compose.green.yaml create mode 100755 nas-deployment/generate-compose.sh diff --git a/DEPLOYMENT.md b/DEPLOYMENT.md index 03795c15..195fa23c 100644 --- a/DEPLOYMENT.md +++ b/DEPLOYMENT.md @@ -2,166 +2,218 @@ ## Overview -The Soroban Abacus Flashcards application is deployed to production at `https://abaci.one` using a fully automated CI/CD pipeline. The system follows the established NAS deployment pattern with Docker containerization, GitHub Actions for CI/CD, Traefik reverse proxy, Watchtower for auto-updates, and Porkbun DDNS integration. +The Soroban Abacus Flashcards application is deployed to production at `https://abaci.one` using a blue-green deployment strategy with zero-downtime updates. ## Architecture ``` -User Request → Cloudflare → abaci.one (DDNS) → Synology NAS → Traefik → Docker Container +User Request → Cloudflare → abaci.one (DDNS) → Synology NAS → Traefik → Docker (Blue + Green) ``` ### Components 1. **Source**: Monorepo with pnpm workspaces and Turborepo -2. **CI/CD**: GitHub Actions with automated Docker builds +2. **CI/CD**: GitHub Actions builds and pushes Docker images 3. **Registry**: GitHub Container Registry (ghcr.io) -4. **Deployment**: Synology NAS with Docker Compose -5. **Reverse Proxy**: Traefik with Let's Encrypt SSL -6. **Auto-Updates**: Watchtower (5-minute polling) +4. **Auto-Deploy**: compose-updater detects new images +5. **Load Balancing**: Traefik routes to healthy containers +6. **Reverse Proxy**: Traefik with Let's Encrypt SSL 7. **DNS**: Porkbun DDNS for dynamic IP updates +## Blue-Green Deployment + +Two containers (`abaci-blue` and `abaci-green`) run simultaneously: + +- **Shared resources**: Both containers mount the same data volumes +- **Health checks**: Traefik only routes to healthy containers +- **Zero downtime**: When one container restarts, the other serves traffic +- **Automatic updates**: compose-updater pulls new images and restarts containers + +### How It Works + +``` +1. Push to main branch + ↓ +2. GitHub Actions builds and pushes Docker image to ghcr.io + ↓ +3. compose-updater detects new image (checks every 5 minutes) + ↓ +4. compose-updater restarts containers one at a time + ↓ +5. Traefik health checks ensure traffic only goes to ready containers +``` + +### Health Check Endpoint + +The `/api/health` endpoint verifies container readiness: + +```bash +curl https://abaci.one/api/health +``` + +Response: +```json +{ + "status": "healthy", + "timestamp": "2025-01-14T12:00:00.000Z", + "checks": { + "database": { + "status": "ok", + "latencyMs": 2 + } + } +} +``` + +- Returns `200 OK` when healthy +- Returns `503 Service Unavailable` when unhealthy +- Traefik uses this to determine if a container should receive traffic + ## Deployment Process -### 1. Code Push Triggers Build +### Automatic Deployment When code is pushed to the `main` branch: -1. GitHub Actions workflow (`.github/workflows/deploy.yml`) triggers -2. Multi-stage Docker build runs: - - Install dependencies with pnpm - - Generate Panda CSS styled-system - - Build Next.js app with Turborepo - - Create optimized production image -3. Image pushed to `ghcr.io/antialias/soroban-abacus-flashcards` +1. **Build Phase** (GitHub Actions): + - Multi-stage Docker build + - Image pushed to `ghcr.io/antialias/soroban-abacus-flashcards` -### 2. Automatic Deployment +2. **Deploy Phase** (compose-updater on NAS): + - Detects new image within 5 minutes + - Pulls new image + - Restarts containers (one at a time) + - Traefik routes traffic to healthy containers -1. **Global Watchtower** (located at `/volume1/homes/antialias/projects/global-services/`) polls GitHub Container Registry every 5 minutes -2. Detects new image version and pulls it -3. Gracefully stops old container and starts new one -4. Traefik automatically routes traffic to new container +### Manual Deployment -**Note**: We use a centralized global Watchtower service that monitors ALL containers across the NAS, rather than project-specific Watchtower instances. +```bash +# From local machine (with SSH access to NAS) +./nas-deployment/deploy.sh +``` -### 3. DNS and SSL - -1. Porkbun DDNS keeps `abaci.one` pointing to current NAS IP -2. Traefik handles SSL certificate provisioning via Let's Encrypt -3. Automatic HTTPS redirect and certificate renewal +This script handles migration from the old single-container setup to blue-green. ## File Structure ``` / ├── Dockerfile # Multi-stage build configuration -├── .github/workflows/deploy.yml # CI/CD pipeline -├── apps/web/next.config.js # Next.js standalone output config +├── .github/workflows/deploy.yml # CI/CD pipeline (build + push) +├── apps/web/ +│ └── src/app/api/health/route.ts # Health check endpoint ├── nas-deployment/ -│ ├── docker-compose.yaml # Production container orchestration +│ ├── docker-compose.yaml # Blue-green container config +│ ├── deploy.sh # Manual deployment/migration script │ └── .env # Environment variables (not committed) └── DEPLOYMENT.md # This documentation ``` -## Key Configuration Files +## Docker Compose Configuration -### Dockerfile +Both containers share the same volumes and Traefik service: -- Multi-stage build optimized for monorepo -- pnpm workspace dependency management -- Panda CSS generation step -- Next.js standalone output for optimal Docker deployment -- Proper static file serving configuration +```yaml +services: + blue: + image: ghcr.io/antialias/soroban-abacus-flashcards:latest + container_name: abaci-blue + volumes: + - ./data:/app/apps/web/data # Shared database + - ./uploads:/app/uploads # Shared uploads + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:3000/api/health"] + labels: + - "traefik.http.services.abaci.loadbalancer.server.port=3000" + - "traefik.http.services.abaci.loadbalancer.healthcheck.path=/api/health" -### GitHub Actions Workflow - -- Triggers on push to main branch -- Builds and pushes Docker images to ghcr.io -- Uses GitHub Container Registry for hosting -- Simplified build process (no type checking in CI) - -### Docker Compose - -- Single service deployment -- Traefik labels for reverse proxy routing -- Watchtower compatibility for auto-updates -- Environment variable configuration - -### Next.js Configuration - -- Standalone output mode for Docker optimization -- Build optimization settings -- Static file serving configuration - -## Environment Variables - -Required in `nas-deployment/.env`: - -```bash -# GitHub Container Registry -GITHUB_TOKEN= -GITHUB_USERNAME= - -# Application -NODE_ENV=production + green: + # Same configuration as blue, shares volumes ``` -## NAS Deployment Directory - -``` -/volume1/homes/antialias/projects/abaci.one/ -├── docker-compose.yaml -├── .env -└── logs/ -``` +Traefik automatically load balances between both containers, routing only to healthy ones. ## Monitoring and Maintenance -### Checking Deployment Status +### Check Deployment Status ```bash -# On NAS -docker-compose ps -docker-compose logs -f app +# Check running containers +ssh nas.home.network "docker ps | grep abaci" -# GitHub Actions status -gh run list --repo antialias/soroban-abacus-flashcards +# Check health of both containers +curl https://abaci.one/api/health + +# Check compose-updater logs +ssh nas.home.network "docker logs --tail 50 compose-updater" ``` -### Manual Updates +### View Logs ```bash -# Force update (if needed) -docker-compose pull -docker-compose up -d +# Blue container logs +ssh nas.home.network "docker logs -f abaci-blue" + +# Green container logs +ssh nas.home.network "docker logs -f abaci-green" ``` -### DNS Status +### Force Immediate Update ```bash -# Check DNS resolution -nslookup abaci.one +# Restart compose-updater to trigger immediate check +ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose -f docker-compose.updater.yaml restart" + +# Or manually pull and restart +ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose pull && docker-compose up -d" ``` ## Troubleshooting -### Common Issues +### Health Check Failing -1. **CSS not loading**: Check static file paths in Dockerfile -2. **DNS not updating**: Verify DDNS configuration in existing updater -3. **Container not starting**: Check environment variables and logs -4. **SSL certificate issues**: Traefik will auto-renew, check Traefik logs +1. Check container logs: + ```bash + ssh nas.home.network "docker logs abaci-blue" + ``` -### Build Failures +2. Test health endpoint manually: + ```bash + ssh nas.home.network "docker exec abaci-blue curl -sf http://localhost:3000/api/health" + ``` -1. **TypeScript errors**: Review apps/web/src files for type issues -2. **Dependency issues**: Verify workspace dependencies are correctly referenced -3. **Docker build timeout**: Check .dockerignore and build optimization +3. Check database connectivity -### Production Issues +### Container Not Updating -1. **Site not accessible**: Check Traefik configuration and DNS -2. **Auto-updates not working**: Verify Watchtower is running -3. **Performance issues**: Monitor container resources +1. Verify GitHub Actions completed successfully +2. Check compose-updater is running: + ```bash + ssh nas.home.network "docker ps | grep compose-updater" + ``` +3. Check compose-updater logs for errors + +### Both Containers Unhealthy + +```bash +# Force restart both +ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose restart" +``` + +## Migration from Single Container + +If upgrading from the old single-container setup: + +```bash +./nas-deployment/deploy.sh +``` + +This script will: +1. Stop the old `soroban-abacus-flashcards` container +2. Stop compose-updater temporarily +3. Deploy the new docker-compose.yaml +4. Start both blue and green containers +5. Restart compose-updater ## Security @@ -169,61 +221,15 @@ nslookup abaci.one - Minimal Alpine Linux base image - GitHub Container Registry with token authentication - Traefik handles SSL termination -- No sensitive data in committed files +- Health check doesn't expose sensitive data -## Dependencies +## Performance -### External Services - -- GitHub (source code and CI/CD) -- GitHub Container Registry (image hosting) -- Porkbun (DNS management) -- Let's Encrypt (SSL certificates) - -### Infrastructure - -- Synology NAS (hosting) -- Docker and Docker Compose -- Traefik reverse proxy -- **Global Watchtower** (centralized auto-updater for all containers) - -## Backup and Recovery - -### Container Recovery - -```bash -# Stop and remove container -docker-compose down - -# Pull latest image -docker-compose pull - -# Start fresh container -docker-compose up -d -``` - -### Configuration Backup - -- `docker-compose.yaml` and `.env` files are backed up via NAS snapshots -- Source code is version controlled in GitHub -- Container images stored in GitHub Container Registry - -## Performance Optimization - -- Next.js standalone output reduces image size -- Multi-stage Docker build minimizes production image -- Panda CSS pre-compilation -- Traefik connection pooling and compression -- Docker layer caching in CI/CD - -## Future Improvements - -- Health checks in Docker configuration -- Container resource limits -- Monitoring and alerting integration -- Staging environment setup -- Database integration (if needed) +- Zero-downtime deployments via load balancing +- Health checks prevent routing to unhealthy containers +- Both containers share data - no sync needed +- SQLite WAL mode handles concurrent access --- -_This deployment system provides a production-ready, automated, and maintainable infrastructure for the Soroban Abacus Flashcards application._ +_Last updated: January 2025_ diff --git a/apps/web/.claude/DEPLOYMENT.md b/apps/web/.claude/DEPLOYMENT.md index bdc7e5e2..1f37f44d 100644 --- a/apps/web/.claude/DEPLOYMENT.md +++ b/apps/web/.claude/DEPLOYMENT.md @@ -12,23 +12,14 @@ This document describes the production deployment infrastructure and procedures - Server is not accessible from external networks - **Project Directory**: `/volume1/homes/antialias/projects/abaci.one` -### Docker Configuration +### Deployment Strategy: Blue-Green with Load Balancing -This deployment uses **two separate Docker Compose projects**: +Two containers (`abaci-blue` and `abaci-green`) run simultaneously: -1. **soroban-app** (`docker-compose.yaml`) - - Main web application - - Container: `soroban-abacus-flashcards` - - Image: `ghcr.io/antialias/soroban-abacus-flashcards:main` - - Port: 3000 (internal to Docker network) - -2. **soroban-updater** (`docker-compose.updater.yaml`) - - Automatic update service - - Container: `compose-updater` - - Image: `virtualzone/compose-updater:latest` - - Checks for new images every 5 minutes - -**Why separate projects?** If compose-updater was in the same project as the app, running `docker-compose down` would kill itself mid-update. Separate projects prevent this. +- **Shared resources**: Both containers mount the same data volumes +- **Health checks**: Traefik only routes to healthy containers +- **Zero downtime**: When one container restarts, the other serves traffic +- **Automatic updates**: compose-updater pulls new images and restarts containers ### Auto-Deployment with compose-updater @@ -36,20 +27,32 @@ This deployment uses **two separate Docker Compose projects**: - **Update frequency**: Every **5 minutes** (configurable via `INTERVAL=5`) - Works WITH docker-compose files (respects configuration, volumes, environment variables) - Automatically cleans up old images (`CLEANUP=1`) -- No manual intervention required for deployments after pushing to main -**Key advantages over Watchtower:** +## Health Check Endpoint -- Respects docker-compose.yaml configuration -- Re-reads `.env` file on every update -- Can manage multiple docker-compose projects -- Container labels control which containers to watch: - ```yaml - labels: - - "docker-compose-watcher.watch=1" - - "docker-compose-watcher.dir=/volume1/homes/antialias/projects/abaci.one" - - "com.centurylinklabs.watchtower.enable=false" # Disables Watchtower for this container - ``` +The `/api/health` endpoint verifies container readiness: + +```bash +curl https://abaci.one/api/health +``` + +Response: +```json +{ + "status": "healthy", + "timestamp": "2025-01-14T12:00:00.000Z", + "checks": { + "database": { + "status": "ok", + "latencyMs": 2 + } + } +} +``` + +- Returns `200 OK` when healthy +- Returns `503 Service Unavailable` when unhealthy +- Traefik uses this to determine if a container should receive traffic ## Database Management @@ -67,7 +70,7 @@ This deployment uses **two separate Docker Compose projects**: 2. Logs: `🔄 Running database migrations...` 3. Drizzle migrator runs all pending migrations 4. Logs: `✅ Migrations complete` (on success) - 5. Logs: `❌ Migration failed: [error]` (on failure, process exits) + 5. Health check passes only after migrations complete ### Nuke and Rebuild Database @@ -80,17 +83,17 @@ ssh nas.home.network # Navigate to project directory cd /volume1/homes/antialias/projects/abaci.one -# Stop the container -/usr/local/bin/docker-compose down +# Stop both containers +docker stop abaci-blue abaci-green # Remove database files rm -f data/sqlite.db data/sqlite.db-shm data/sqlite.db-wal -# Restart container (migrations will rebuild DB) -/usr/local/bin/docker-compose up -d +# Restart containers (migrations will rebuild DB) +docker start abaci-blue abaci-green # Check logs to verify migration success -/usr/local/bin/docker logs soroban-abacus-flashcards | grep -E '(Migration|Starting)' +docker logs abaci-blue | grep -E '(Migration|Starting)' ``` ## CI/CD Pipeline @@ -99,30 +102,21 @@ rm -f data/sqlite.db data/sqlite.db-shm data/sqlite.db-wal When code is pushed to `main` branch: -1. **Workflows triggered**: - - `Build and Deploy` - Builds Docker image and pushes to GHCR - - `Release` - Manages semantic versioning and releases - - `Verify Examples` - Runs example tests - - `Deploy Storybooks to GitHub Pages` - Publishes Storybook +1. **Build and Push job**: + - Builds Docker image + - Tags as `main` and `latest` + - Pushes to GitHub Container Registry (ghcr.io) -2. **Image build**: - - Built image is tagged as `main` (also `latest` for compatibility) - - Pushed to GitHub Container Registry (ghcr.io) - - Typically completes within 1-2 minutes - -3. **Deployment**: - - compose-updater detects new image (within 5 minutes) +2. **Deploy** (compose-updater on NAS): + - Detects new image within 5 minutes - Pulls new image - - Runs `docker-compose down && docker-compose up -d` - - Cleans up old images - - Total deployment time: ~5-7 minutes from push to production (15-30 seconds downtime during restart) + - Restarts containers + - Traefik routes traffic to healthy containers ## Manual Deployment Procedures ### Force Pull Latest Image -If you need to immediately deploy without waiting for compose-updater's next check cycle: - ```bash # Option 1: Restart compose-updater (triggers immediate check) ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose -f docker-compose.updater.yaml restart" @@ -134,64 +128,83 @@ ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-c ### Check Container Status ```bash -# Check both app and compose-updater -ssh nas.home.network "docker ps | grep -E '(soroban|compose)'" +# Check both containers +ssh nas.home.network "docker ps | grep abaci" -# Check just the app -ssh nas.home.network "docker ps | grep soroban-abacus-flashcards" +# Check health +curl https://abaci.one/api/health ``` ### View Logs ```bash -# Application logs - recent -ssh nas.home.network "docker logs --tail 100 soroban-abacus-flashcards" +# Blue container logs +ssh nas.home.network "docker logs --tail 100 abaci-blue" -# Application logs - follow in real-time -ssh nas.home.network "docker logs -f soroban-abacus-flashcards" +# Green container logs +ssh nas.home.network "docker logs --tail 100 abaci-green" -# compose-updater logs - see update activity +# Follow in real-time +ssh nas.home.network "docker logs -f abaci-blue" + +# compose-updater logs ssh nas.home.network "docker logs --tail 50 compose-updater" - -# compose-updater logs - follow to watch for updates -ssh nas.home.network "docker logs -f compose-updater" - -# Search for specific patterns -ssh nas.home.network "docker logs soroban-abacus-flashcards" | grep -i "error" ``` -### Restart Container +### Restart Containers ```bash -# Restart just the app (quick, minimal downtime) +# Restart both (they restart one at a time, maintaining availability) ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose restart" - -# Full restart (down then up, recreates container) -ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose down && docker-compose up -d" - -# Restart compose-updater (triggers immediate update check) -ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose -f docker-compose.updater.yaml restart" ``` ## Checking Deployed Version -Always verify what's actually running in production: - ```bash -# Get commit SHA of running container -ssh nas.home.network 'docker inspect soroban-abacus-flashcards --format="{{index .Config.Labels \"org.opencontainers.image.revision\"}}"' +# Get commit SHA of running containers +ssh nas.home.network 'docker inspect abaci-blue --format="{{index .Config.Labels \"org.opencontainers.image.revision\"}}"' +ssh nas.home.network 'docker inspect abaci-green --format="{{index .Config.Labels \"org.opencontainers.image.revision\"}}"' # Compare with current HEAD git rev-parse HEAD - -# Or check via the deployment info modal in the app UI ``` ## Troubleshooting -### Common Issues +### Health Check Failing -#### 1. Migration Failures +1. Check container logs: + ```bash + ssh nas.home.network "docker logs abaci-blue" + ``` + +2. Test health endpoint manually: + ```bash + ssh nas.home.network "docker exec abaci-blue curl -sf http://localhost:3000/api/health" + ``` + +3. Check database connectivity + +### Container Not Updating + +1. Verify GitHub Actions completed successfully +2. Check compose-updater is running: + ```bash + ssh nas.home.network "docker ps | grep compose-updater" + ``` +3. Check compose-updater logs for errors: + ```bash + ssh nas.home.network "docker logs --tail 50 compose-updater" + ``` + +### Both Containers Unhealthy + +```bash +# Force restart both +ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose restart" +``` + +### Migration Failures **Symptom**: Container keeps restarting, logs show migration errors @@ -201,90 +214,10 @@ git rev-parse HEAD 2. Verify `drizzle/meta/_journal.json` is up to date 3. If migrations are corrupted, may need to nuke database (see above) -#### 2. Container Not Updating - -**Symptom**: Changes pushed but production still shows old code - -**Possible causes**: - -- GitHub Actions build failed - check workflow status with `gh run list` -- compose-updater not running - check with `docker ps | grep compose-updater` -- compose-updater labels incorrect - check container labels -- Image not pulled - manually pull with `docker-compose pull` -- **compose-updater detection issue** - May not detect updates reliably (investigation ongoing - 2025-11-13) - -**Debugging**: - -```bash -# Check compose-updater is running -ssh nas.home.network "docker ps | grep compose-updater" - -# Check compose-updater logs for errors and pull activity -ssh nas.home.network "docker logs --tail 50 compose-updater" -# Look for: "Processing service" followed by pull activity -# If it says "No need to restart" WITHOUT pulling, detection may be broken - -# Check container labels are correct -ssh nas.home.network "docker inspect soroban-abacus-flashcards" | grep -A3 "docker-compose-watcher" -# Should show: -# "docker-compose-watcher.watch": "1" -# "docker-compose-watcher.dir": "/volume1/homes/antialias/projects/abaci.one" -``` - -**Known Issue (2025-11-13)**: - -compose-updater sometimes fails to detect updates even when new images are available. Logs show: - -``` -Processing service soroban-abacus-flashcards (requires build: false, watched: true)... -No need to restart services in /volume1/homes/antialias/projects/abaci.one/docker-compose.yaml -``` - -Without any `docker pull` activity shown, even with `LOG_LEVEL=debug`. This suggests it's determining "no update needed" without actually checking the remote registry. Root cause under investigation. - -**Solution**: - -```bash -# Option 1: Manual pull and restart (most reliable) -ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose pull && docker-compose up -d" - -# Option 2: Restart compose-updater to force immediate check (may not always work) -ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose -f docker-compose.updater.yaml restart" -``` - -#### 3. Missing Database Columns - -**Symptom**: Errors like `SqliteError: no such column: "column_name"` - -**Cause**: Migration not registered or not run - -**Solution**: - -1. Verify migration exists in `drizzle/` directory -2. Check migration is registered in `drizzle/meta/_journal.json` -3. If migration is new, restart container to run migrations -4. If migration is malformed, fix it and nuke database - -#### 4. API Returns Unexpected Response - -**Symptom**: Client shows errors but API appears to work - -**Debugging**: - -1. Test API directly with curl: `curl -X POST 'https://abaci.one/api/arcade/rooms' -H 'Content-Type: application/json' -d '...'` -2. Check production logs for errors -3. Verify container is running latest image: - ```bash - ssh nas.home.network "/usr/local/bin/docker inspect soroban-abacus-flashcards --format '{{.Created}}'" - ``` -4. Compare with commit timestamp: `git log --format="%ci" -1` - ## Environment Variables Production environment variables are stored in `.env` file on the server and loaded via `env_file:` in docker-compose.yaml. -**Critical advantage**: compose-updater re-reads the `.env` file on every update, so environment variable changes are automatically picked up without manual intervention. - Common variables: - `AUTH_URL` - Base URL (https://abaci.one) @@ -298,8 +231,8 @@ To update environment variables: # Edit .env file on NAS ssh nas.home.network "vi /volume1/homes/antialias/projects/abaci.one/.env" -# Restart compose-updater (will pick up new .env on next cycle) -ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose -f docker-compose.updater.yaml restart" +# Restart containers to pick up changes +ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-compose restart" ``` ## Network Configuration @@ -308,6 +241,7 @@ ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-c - **HTTPS**: Automatic via Traefik with Let's Encrypt - **Domain**: abaci.one - **Exposed Port**: 3000 (internal to Docker network) +- **Load Balancing**: Traefik routes to both containers, health checks determine eligibility ## Security Notes @@ -315,3 +249,4 @@ ssh nas.home.network "cd /volume1/homes/antialias/projects/abaci.one && docker-c - SSH access is restricted to local network only - Docker container runs with appropriate user permissions - Secrets are managed via environment variables, not committed to repo +- Health check endpoint doesn't expose sensitive data diff --git a/apps/web/src/app/api/health/route.ts b/apps/web/src/app/api/health/route.ts new file mode 100644 index 00000000..4debdc96 --- /dev/null +++ b/apps/web/src/app/api/health/route.ts @@ -0,0 +1,69 @@ +/** + * Health check endpoint for deployment orchestration + * + * GET /api/health + * + * Returns 200 OK when the application is ready to serve traffic: + * - Database connection is working + * - All critical services are initialized + * + * Used by: + * - Red-black deployment scripts to verify new containers are ready + * - Docker healthcheck for container orchestration + * - Load balancers to determine if traffic should be routed + */ + +import { NextResponse } from 'next/server' +import { sql } from 'drizzle-orm' +import { db } from '@/db' + +export const dynamic = 'force-dynamic' + +interface HealthCheckResult { + status: 'healthy' | 'unhealthy' + timestamp: string + checks: { + database: { + status: 'ok' | 'error' + latencyMs?: number + error?: string + } + } + version?: string + commit?: string +} + +export async function GET(): Promise> { + const startTime = Date.now() + const result: HealthCheckResult = { + status: 'healthy', + timestamp: new Date().toISOString(), + checks: { + database: { status: 'ok' }, + }, + } + + // Check database connectivity + try { + const dbStart = Date.now() + await db.run(sql`SELECT 1`) + result.checks.database.latencyMs = Date.now() - dbStart + } catch (error) { + result.status = 'unhealthy' + result.checks.database = { + status: 'error', + error: error instanceof Error ? error.message : 'Unknown database error', + } + } + + // Add version info from environment (set during Docker build) + if (process.env.GIT_COMMIT) { + result.commit = process.env.GIT_COMMIT + } + if (process.env.npm_package_version) { + result.version = process.env.npm_package_version + } + + const statusCode = result.status === 'healthy' ? 200 : 503 + return NextResponse.json(result, { status: statusCode }) +} diff --git a/nas-deployment/deploy.sh b/nas-deployment/deploy.sh index bc5853fa..73f2e5b7 100755 --- a/nas-deployment/deploy.sh +++ b/nas-deployment/deploy.sh @@ -1,46 +1,162 @@ #!/bin/bash +# +# Migration/Manual Deployment Script for Soroban Abacus Flashcards +# +# This script handles: +# 1. Migration from single-container to blue-green setup +# 2. Manual deployments when you don't want to wait for compose-updater +# +# Usage: +# ./deploy.sh # Deploy/migrate to blue-green setup +# -# Soroban Abacus Flashcards - NAS Deployment Script -# This script deploys the monorepo's apps/web to abaci.one +set -euo pipefail -set -e +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +NAS_HOST="${NAS_HOST:-nas.home.network}" +NAS_PATH="${NAS_PATH:-/volume1/homes/antialias/projects/abaci.one}" +IMAGE="ghcr.io/antialias/soroban-abacus-flashcards" -NAS_HOST="nas.home.network" -NAS_PATH="/volume1/homes/antialias/projects/abaci.one" -LOCAL_DIR="$(dirname "$0")" +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color -echo "🚀 Deploying Soroban Abacus Flashcards to NAS..." +log() { + echo -e "${GREEN}[deploy]${NC} $1" +} -# Stop existing services -echo "📦 Stopping existing services..." -ssh "$NAS_HOST" "cd '$NAS_PATH' && docker-compose down || true" +warn() { + echo -e "${YELLOW}[deploy]${NC} $1" +} -# Copy deployment files -echo "📁 Copying deployment files..." -scp "$LOCAL_DIR/docker-compose.yaml" "$NAS_HOST:$NAS_PATH/" -scp "$LOCAL_DIR/.env" "$NAS_HOST:$NAS_PATH/" 2>/dev/null || echo "⚠️ No .env file found locally - using existing on NAS" +error() { + echo -e "${RED}[deploy]${NC} $1" >&2 +} -# Ensure DDNS config is in place (only if it doesn't exist) -ssh "$NAS_HOST" "mkdir -p '$NAS_PATH/ddns-data'" -scp "$LOCAL_DIR/ddns-data/ddns-config.json" "$NAS_HOST:$NAS_PATH/ddns-data/" 2>/dev/null || echo "ℹ️ Using existing DDNS config" +# Run a command on the NAS via SSH +# Synology NAS needs /usr/local/bin in PATH for docker commands +nas_exec() { + ssh "$NAS_HOST" "export PATH=/usr/local/bin:\$PATH && cd '$NAS_PATH' && $1" +} -# Create required directories -echo "📂 Creating required directories..." -ssh "$NAS_HOST" "cd '$NAS_PATH' && mkdir -p public data uploads" +# Check if old single-container setup exists +check_needs_migration() { + if nas_exec "docker ps -a --format '{{.Names}}' | grep -q '^soroban-abacus-flashcards$'"; then + echo "yes" + else + echo "no" + fi +} -# Pull latest image and start services -echo "🐳 Starting services..." -ssh "$NAS_HOST" "cd '$NAS_PATH' && docker-compose pull && docker-compose up -d" +# Migrate from single-container to blue-green setup +migrate_to_blue_green() { + log "Migrating from single-container to blue-green setup..." -# Show status -echo "✅ Deployment complete!" -echo "" -echo "🌐 Services:" -echo " - Soroban Flashcards: https://abaci.one" -echo " - DDNS Web UI: http://$(ssh "$NAS_HOST" "hostname -I | awk '{print \$1}'"):8000" -echo "" -echo "📊 Check status:" -echo " ssh $NAS_HOST 'cd $NAS_PATH && docker-compose ps'" -echo "" -echo "📝 View logs:" -echo " ssh $NAS_HOST 'cd $NAS_PATH && docker-compose logs -f soroban-abacus-flashcards'" \ No newline at end of file + # Stop old container + warn "Stopping old container..." + nas_exec "docker stop soroban-abacus-flashcards 2>/dev/null || true" + nas_exec "docker rm soroban-abacus-flashcards 2>/dev/null || true" + + log "Migration complete." +} + +# Sync deployment files to NAS +sync_files() { + log "Syncing deployment files to NAS..." + + # Check if blue/green files exist (need to be generated) + if [[ ! -f "$SCRIPT_DIR/docker-compose.blue.yaml" ]] || [[ ! -f "$SCRIPT_DIR/docker-compose.green.yaml" ]]; then + warn "Blue/green compose files not found. Generating..." + if [[ -x "$SCRIPT_DIR/generate-compose.sh" ]]; then + "$SCRIPT_DIR/generate-compose.sh" + else + error "generate-compose.sh not found or not executable" + error "Run: ./generate-compose.sh (requires yq)" + exit 1 + fi + fi + + # Copy all compose files (use -O for legacy SCP protocol - required by Synology NAS) + scp -O "$SCRIPT_DIR/docker-compose.yaml" "$NAS_HOST:$NAS_PATH/" + scp -O "$SCRIPT_DIR/docker-compose.blue.yaml" "$NAS_HOST:$NAS_PATH/" + scp -O "$SCRIPT_DIR/docker-compose.green.yaml" "$NAS_HOST:$NAS_PATH/" + + # NEVER overwrite production .env automatically - it contains secrets + # Use --sync-env flag to explicitly copy .env (dangerous!) + if [[ "${SYNC_ENV:-}" == "true" ]]; then + if [[ -f "$SCRIPT_DIR/.env" ]]; then + warn "SYNC_ENV=true - copying local .env to NAS (overwrites production secrets!)" + scp -O "$SCRIPT_DIR/.env" "$NAS_HOST:$NAS_PATH/" + fi + else + log "Keeping existing .env on NAS (use SYNC_ENV=true to overwrite)" + fi + + # Ensure directories exist + nas_exec "mkdir -p public data uploads ddns-data" +} + +# Main deployment logic +main() { + log "==========================================" + log "Deploying abaci.one (Blue-Green)" + log "==========================================" + log "NAS host: $NAS_HOST" + echo "" + + # Sync deployment files + sync_files + echo "" + + # Check if we need to migrate from old setup + local needs_migration + needs_migration=$(check_needs_migration) + if [[ "$needs_migration" == "yes" ]]; then + warn "Detected old single-container setup" + migrate_to_blue_green + echo "" + fi + + # Pull latest image and start containers + log "Pulling latest image..." + nas_exec "docker-compose pull" + echo "" + + log "Starting containers..." + nas_exec "docker-compose up -d" + echo "" + + # Wait a moment for containers to start + log "Waiting for containers to start..." + sleep 5 + + # Check status + log "Container status:" + nas_exec "docker ps --format 'table {{.Names}}\t{{.Status}}' | grep -E '(NAMES|abaci)'" + echo "" + + # Test health endpoint + log "Testing health endpoint..." + local health_blue health_green + health_blue=$(nas_exec "docker exec abaci-blue curl -sf http://localhost:3000/api/health 2>/dev/null && echo 'OK' || echo 'FAIL'") + health_green=$(nas_exec "docker exec abaci-green curl -sf http://localhost:3000/api/health 2>/dev/null && echo 'OK' || echo 'FAIL'") + + log " abaci-blue: $health_blue" + log " abaci-green: $health_green" + echo "" + + log "==========================================" + log "Deployment complete!" + log "==========================================" + log "Site: https://abaci.one" + log "Health: https://abaci.one/api/health" + log "" + log "Both containers are running. Traefik will load balance" + log "between them based on health checks." + log "==========================================" +} + +# Run main +main "$@" diff --git a/nas-deployment/docker-compose.blue.yaml b/nas-deployment/docker-compose.blue.yaml new file mode 100644 index 00000000..c32e7c34 --- /dev/null +++ b/nas-deployment/docker-compose.blue.yaml @@ -0,0 +1,45 @@ +version: "3.8" +services: + blue: + image: ghcr.io/antialias/soroban-abacus-flashcards:latest + restart: unless-stopped + env_file: + - .env + volumes: + - ./public:/app/public + - ./data:/app/apps/web/data + - ./uploads:/app/uploads + networks: + - webgateway + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/', r => process.exit(r.statusCode < 400 ? 0 : 1)).on('error', () => process.exit(1))"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 30s + container_name: abaci-blue + labels: + traefik.enable: "true" + traefik.http.routers.abaci.rule: "Host(`abaci.one`)" + traefik.http.routers.abaci.entrypoints: websecure + traefik.http.routers.abaci.tls: "true" + traefik.http.routers.abaci.tls.certresolver: myresolver + traefik.http.routers.abaci.middlewares: hsts@docker + traefik.http.routers.abaci.service: abaci + traefik.http.routers.abaci-http.rule: "Host(`abaci.one`)" + traefik.http.routers.abaci-http.entrypoints: web + traefik.http.routers.abaci-http.middlewares: redirect-https@docker + traefik.http.services.abaci.loadbalancer.server.port: "3000" + traefik.http.services.abaci.loadbalancer.healthcheck.path: / + traefik.http.services.abaci.loadbalancer.healthcheck.interval: 10s + traefik.http.middlewares.redirect-https.redirectscheme.scheme: https + traefik.http.middlewares.redirect-https.redirectscheme.permanent: "true" + traefik.http.middlewares.hsts.headers.stsSeconds: "63072000" + traefik.http.middlewares.hsts.headers.stsIncludeSubdomains: "true" + traefik.http.middlewares.hsts.headers.stsPreload: "true" + docker-compose-watcher.watch: "1" + docker-compose-watcher.dir: /volume1/homes/antialias/projects/abaci.one + docker-compose-watcher.file: docker-compose.blue.yaml +networks: + webgateway: + external: true diff --git a/nas-deployment/docker-compose.green.yaml b/nas-deployment/docker-compose.green.yaml new file mode 100644 index 00000000..1854e6d2 --- /dev/null +++ b/nas-deployment/docker-compose.green.yaml @@ -0,0 +1,45 @@ +version: "3.8" +services: + green: + image: ghcr.io/antialias/soroban-abacus-flashcards:latest + restart: unless-stopped + env_file: + - .env + volumes: + - ./public:/app/public + - ./data:/app/apps/web/data + - ./uploads:/app/uploads + networks: + - webgateway + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/', r => process.exit(r.statusCode < 400 ? 0 : 1)).on('error', () => process.exit(1))"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 30s + container_name: abaci-green + labels: + traefik.enable: "true" + traefik.http.routers.abaci.rule: "Host(`abaci.one`)" + traefik.http.routers.abaci.entrypoints: websecure + traefik.http.routers.abaci.tls: "true" + traefik.http.routers.abaci.tls.certresolver: myresolver + traefik.http.routers.abaci.middlewares: hsts@docker + traefik.http.routers.abaci.service: abaci + traefik.http.routers.abaci-http.rule: "Host(`abaci.one`)" + traefik.http.routers.abaci-http.entrypoints: web + traefik.http.routers.abaci-http.middlewares: redirect-https@docker + traefik.http.services.abaci.loadbalancer.server.port: "3000" + traefik.http.services.abaci.loadbalancer.healthcheck.path: / + traefik.http.services.abaci.loadbalancer.healthcheck.interval: 10s + traefik.http.middlewares.redirect-https.redirectscheme.scheme: https + traefik.http.middlewares.redirect-https.redirectscheme.permanent: "true" + traefik.http.middlewares.hsts.headers.stsSeconds: "63072000" + traefik.http.middlewares.hsts.headers.stsIncludeSubdomains: "true" + traefik.http.middlewares.hsts.headers.stsPreload: "true" + docker-compose-watcher.watch: "1" + docker-compose-watcher.dir: /volume1/homes/antialias/projects/abaci.one + docker-compose-watcher.file: docker-compose.green.yaml +networks: + webgateway: + external: true diff --git a/nas-deployment/docker-compose.yaml b/nas-deployment/docker-compose.yaml index cf92d94e..5db972b6 100644 --- a/nas-deployment/docker-compose.yaml +++ b/nas-deployment/docker-compose.yaml @@ -1,49 +1,70 @@ version: "3.8" +# Main docker-compose file - source of truth +# +# Run `./generate-compose.sh` to generate docker-compose.blue.yaml and +# docker-compose.green.yaml for compose-updater (requires yq). +# +# compose-updater needs separate files so it restarts containers independently, +# giving us zero-downtime deployments. + +x-app: &app + image: ghcr.io/antialias/soroban-abacus-flashcards:latest + restart: unless-stopped + env_file: + - .env + volumes: + - ./public:/app/public + - ./data:/app/apps/web/data + - ./uploads:/app/uploads + networks: + - webgateway + healthcheck: + test: ["CMD", "node", "-e", "require('http').get('http://localhost:3000/', r => process.exit(r.statusCode < 400 ? 0 : 1)).on('error', () => process.exit(1))"] + interval: 10s + timeout: 5s + retries: 3 + start_period: 30s + +x-traefik-labels: &traefik-labels + traefik.enable: "true" + traefik.http.routers.abaci.rule: "Host(`abaci.one`)" + traefik.http.routers.abaci.entrypoints: websecure + traefik.http.routers.abaci.tls: "true" + traefik.http.routers.abaci.tls.certresolver: myresolver + traefik.http.routers.abaci.middlewares: hsts@docker + traefik.http.routers.abaci.service: abaci + traefik.http.routers.abaci-http.rule: "Host(`abaci.one`)" + traefik.http.routers.abaci-http.entrypoints: web + traefik.http.routers.abaci-http.middlewares: redirect-https@docker + traefik.http.services.abaci.loadbalancer.server.port: "3000" + traefik.http.services.abaci.loadbalancer.healthcheck.path: / + traefik.http.services.abaci.loadbalancer.healthcheck.interval: 10s + traefik.http.middlewares.redirect-https.redirectscheme.scheme: https + traefik.http.middlewares.redirect-https.redirectscheme.permanent: "true" + traefik.http.middlewares.hsts.headers.stsSeconds: "63072000" + traefik.http.middlewares.hsts.headers.stsIncludeSubdomains: "true" + traefik.http.middlewares.hsts.headers.stsPreload: "true" + services: - # ──────────────────────────────────── - # Soroban Abacus Flashcards Web App (apps/web) - # ──────────────────────────────────── - soroban-abacus-flashcards: - image: ghcr.io/antialias/soroban-abacus-flashcards:latest - container_name: soroban-abacus-flashcards - restart: unless-stopped - env_file: - - .env - volumes: - - ./public:/app/public - - ./data:/app/apps/web/data - - ./uploads:/app/uploads + blue: + <<: *app + container_name: abaci-blue labels: - # ── Traefik Routers ─────────────────────────────────── - # HTTPS router - - "traefik.enable=true" - - "traefik.http.routers.abaci.rule=Host(`abaci.one`)" - - "traefik.http.routers.abaci.entrypoints=websecure" - - "traefik.http.routers.abaci.tls=true" - - "traefik.http.routers.abaci.tls.certresolver=myresolver" - - "traefik.http.routers.abaci.middlewares=hsts@docker" + <<: *traefik-labels + docker-compose-watcher.watch: "1" + docker-compose-watcher.dir: /volume1/homes/antialias/projects/abaci.one + docker-compose-watcher.file: docker-compose.blue.yaml - # HTTP → HTTPS redirect router - - "traefik.http.routers.abaci-http.rule=Host(`abaci.one`)" - - "traefik.http.routers.abaci-http.entrypoints=web" - - "traefik.http.routers.abaci-http.middlewares=redirect-https@docker" + green: + <<: *app + container_name: abaci-green + labels: + <<: *traefik-labels + docker-compose-watcher.watch: "1" + docker-compose-watcher.dir: /volume1/homes/antialias/projects/abaci.one + docker-compose-watcher.file: docker-compose.green.yaml - # ── Abaci service definition - - "traefik.http.services.abaci.loadbalancer.server.port=3000" - - # ── Shared middlewares - - "traefik.http.middlewares.redirect-https.redirectscheme.scheme=https" - - "traefik.http.middlewares.redirect-https.redirectscheme.permanent=true" - - "traefik.http.middlewares.hsts.headers.stsSeconds=63072000" - - "traefik.http.middlewares.hsts.headers.stsIncludeSubdomains=true" - - "traefik.http.middlewares.hsts.headers.stsPreload=true" - networks: - - webgateway - - # ──────────────────────────────────── - # DDNS Updater (Porkbun for abaci.one) - # ──────────────────────────────────── ddns-updater: image: qmcgaw/ddns-updater:latest container_name: ddns-updater @@ -52,30 +73,11 @@ services: environment: - TZ=America/Chicago ports: - - "8000:8000" # optional web UI + - "8000:8000" restart: unless-stopped networks: - webgateway - # ──────────────────────────────────── - # Watchtower (auto-update) - # ──────────────────────────────────── - watchtower: - image: containrrr/watchtower - container_name: watchtower - restart: unless-stopped - volumes: - - /var/run/docker.sock:/var/run/docker.sock - command: --interval 300 soroban-abacus-flashcards ddns-updater - environment: - - WATCHTOWER_CLEANUP=true - - WATCHTOWER_ROLLING_RESTART=true - networks: - - webgateway - -# ────────────────────────────────────── -# Networks & Volumes -# ────────────────────────────────────── networks: webgateway: - external: true # same network Traefik lives on + external: true diff --git a/nas-deployment/generate-compose.sh b/nas-deployment/generate-compose.sh new file mode 100755 index 00000000..b2ec9e40 --- /dev/null +++ b/nas-deployment/generate-compose.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# +# Generate docker-compose.blue.yaml and docker-compose.green.yaml +# from the main docker-compose.yaml +# +# compose-updater needs separate files to restart containers independently. +# This script extracts each service into its own file. +# + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Check for yq +if ! command -v yq &> /dev/null; then + echo "Error: yq is required but not installed." + echo "Install with: brew install yq" + exit 1 +fi + +echo "Generating docker-compose.blue.yaml..." +yq eval --yaml-fix-merge-anchor-to-spec ' + explode(.) | + { + "version": .version, + "services": {"blue": .services.blue}, + "networks": .networks + } +' docker-compose.yaml > docker-compose.blue.yaml + +echo "Generating docker-compose.green.yaml..." +yq eval --yaml-fix-merge-anchor-to-spec ' + explode(.) | + { + "version": .version, + "services": {"green": .services.green}, + "networks": .networks + } +' docker-compose.yaml > docker-compose.green.yaml + +echo "Done!" +echo "" +echo "Generated files:" +ls -la docker-compose.blue.yaml docker-compose.green.yaml +echo "" +echo "Verify no secrets were included:" +grep -l "sk-" docker-compose.*.yaml 2>/dev/null && echo "WARNING: Found secrets!" || echo "OK - no secrets found"