diff --git a/.claude/settings.local.json b/.claude/settings.local.json index fe282420..539b2bc4 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -568,7 +568,8 @@ "Bash(scripts/train-column-classifier/.venv/bin/python:*)", "Bash(brew list:*)", "Bash(scripts/train-column-classifier/.venv/bin/pip index:*)", - "Bash(scripts/train-column-classifier/.venv/bin/pip:*)" + "Bash(scripts/train-column-classifier/.venv/bin/pip:*)", + "WebFetch(domain:pypi.org)" ], "deny": [], "ask": [] diff --git a/Dockerfile b/Dockerfile index 54214f83..facb2233 100644 --- a/Dockerfile +++ b/Dockerfile @@ -110,9 +110,11 @@ FROM node:20-slim AS runner WORKDIR /app # Install ONLY runtime dependencies (no build tools) +# python3-venv is needed for creating virtual environments for ML training RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ + python3-venv \ qpdf \ ca-certificates \ && rm -rf /var/lib/apt/lists/* @@ -154,6 +156,9 @@ COPY --from=builder --chown=nextjs:nodejs /app/packages/templates ./packages/tem # Copy abacus-react package (needed for calendar generation scripts) COPY --from=builder --chown=nextjs:nodejs /app/packages/abacus-react ./packages/abacus-react +# Copy ML training scripts (for vision model training) +COPY --from=builder --chown=nextjs:nodejs /app/apps/web/scripts/train-column-classifier ./apps/web/scripts/train-column-classifier + # Install Python dependencies for flashcard generation RUN pip3 install --no-cache-dir --break-system-packages -r packages/core/requirements.txt @@ -164,8 +169,8 @@ COPY --from=builder --chown=nextjs:nodejs /app/apps/web/package.json ./apps/web/ # Set up environment WORKDIR /app/apps/web -# Create data directory for SQLite database and uploads -RUN mkdir -p data/uploads && chown -R nextjs:nodejs data +# Create data directory for SQLite database, uploads, and vision training +RUN mkdir -p data/uploads data/vision-training/collected data/vision-training/.venv && chown -R nextjs:nodejs data USER nextjs EXPOSE 3000 diff --git a/apps/web/.gitignore b/apps/web/.gitignore index 7ee19892..86f96921 100644 --- a/apps/web/.gitignore +++ b/apps/web/.gitignore @@ -64,4 +64,5 @@ data/uploads/ # ML training data training-data/ data/vision-training/collected/ +data/vision-training/.venv/ scripts/train-column-classifier/.venv/ diff --git a/apps/web/src/app/api/vision-training/config.ts b/apps/web/src/app/api/vision-training/config.ts index b1d5f4e5..f51e80d6 100644 --- a/apps/web/src/app/api/vision-training/config.ts +++ b/apps/web/src/app/api/vision-training/config.ts @@ -17,30 +17,27 @@ const cwd = process.cwd() /** * Check if the current platform supports TensorFlow training. - * TensorFlow doesn't have wheels for all platforms (e.g., ARM-based NAS devices). + * TensorFlow has wheels for: + * - macOS x86_64 and arm64 (Apple Silicon with tensorflow-macos) + * - Linux x86_64 and aarch64 + * - Windows x86_64 */ export function isPlatformSupported(): { supported: boolean; reason?: string } { const platform = process.platform const arch = process.arch - // TensorFlow supports: - // - macOS on x86_64 and arm64 (Apple Silicon with tensorflow-macos) - // - Linux on x86_64 (and some arm64 builds) - // - Windows on x86_64 - if (platform === 'darwin') { // macOS - both Intel and Apple Silicon are supported return { supported: true } } if (platform === 'linux') { - if (arch === 'x64') { + if (arch === 'x64' || arch === 'arm64') { return { supported: true } } - // ARM Linux (like Synology NAS) typically doesn't have TensorFlow wheels return { supported: false, - reason: `TensorFlow is not available for Linux ${arch}. Training should be done on a machine with x86_64 or Apple Silicon.`, + reason: `TensorFlow is not available for Linux ${arch}. Training requires x86_64 or ARM64.`, } } @@ -50,7 +47,7 @@ export function isPlatformSupported(): { supported: boolean; reason?: string } { return { supported: false, - reason: `TensorFlow is not available for ${platform} ${arch}. Training should be done on macOS, Linux x86_64, or Windows x86_64.`, + reason: `TensorFlow is not available for ${platform} ${arch}. Training should be done on macOS, Linux (x86_64/ARM64), or Windows x86_64.`, } } @@ -61,8 +58,12 @@ export const TRAINING_SCRIPTS_DIR = path.join(cwd, 'scripts/train-column-classif /** * Path to the venv directory + * We use data/vision-training/.venv because: + * 1. The data/ directory is mounted as a volume in Docker (persists across restarts) + * 2. It's writable by the container + * 3. Scripts directory may not exist in production Docker images */ -const VENV_DIR = path.join(TRAINING_SCRIPTS_DIR, '.venv') +const VENV_DIR = path.join(cwd, 'data/vision-training/.venv') /** * Path to the Python executable in the venv