feat(vision): enhance quad detection with Hough lines and multi-strategy preprocessing

- Add Hough line detection for improved edge finding with finger occlusion - Implement multi-strategy preprocessing (standard, enhanced, adaptive, multi) - Add configurable parameters for Canny thresholds, adaptive threshold, morph gradient - Refactor useDocumentDetection hook with cleaner API - Add OpenCV type definitions and async loading improvements - Add loader test pages for debugging OpenCV initialization - Add quad-test page for interactive detection testing - Add document detection research notes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-12 11:16:06 -06:00 · 2026-01-12 11:16:06 -06:00 · 93a25c1e7b
parent bc02ba281d
commit 93a25c1e7b
29 changed files with 3091 additions and 637 deletions
--- a/.claude/settings.local.json
+++ b/.claude/settings.local.json
@ -641,7 +641,16 @@
      "Bash(if [ -f /Users/antialias/projects/soroban-abacus-flashcards/apps/web/data/vision-training/collected/.deleted ])",
      "Bash(then wc -l /Users/antialias/projects/soroban-abacus-flashcards/apps/web/data/vision-training/collected/.deleted)",
      "Bash(else echo \"File does not exist\")",
-      "Bash(fi)"
+      "Bash(fi)",
+      "WebFetch(domain:docs.opencv.org)",
+      "mcp__chrome-devtools__new_page",
+      "mcp__chrome-devtools__close_page",
+      "WebFetch(domain:www.npmjs.com)",
+      "Bash(git branch:*)",
+      "WebFetch(domain:scanbot.io)",
+      "WebFetch(domain:learnopencv.com)",
+      "WebFetch(domain:news.ycombinator.com)",
+      "Bash(npm run typecheck:*)"
    ],
    "deny": [],
    "ask": []
--- a/apps/web/src/app/vision-training/components/NavSyncIndicator.tsx
+++ b/apps/web/src/app/vision-training/components/NavSyncIndicator.tsx
@ -339,8 +339,8 @@ export function NavSyncIndicator({ sync }: NavSyncIndicatorProps) {
                </div>
                {sync.status?.local && sync.status?.remote && (
                  <div className={css({ color: 'gray.400', fontSize: 'xs' })}>
-                    Local: {sync.status.local.totalImages?.toLocaleString() || 0} •{' '}
-                    Remote: {sync.status.remote.totalImages?.toLocaleString() || 0}
+                    Local: {sync.status.local.totalImages?.toLocaleString() || 0} • Remote:{' '}
+                    {sync.status.remote.totalImages?.toLocaleString() || 0}
                  </div>
                )}
              </div>
--- a/apps/web/src/app/vision-training/hooks/useSyncStatus.ts
+++ b/apps/web/src/app/vision-training/hooks/useSyncStatus.ts
@ -118,7 +118,9 @@ export function useSyncStatus(modelType: ModelType): UseSyncStatusResult {
  const refreshHistory = useCallback(async () => {
    setHistoryLoading(true)
    try {
-      const response = await fetch(`/api/vision-training/sync/history?modelType=${modelType}&limit=5`)
+      const response = await fetch(
+        `/api/vision-training/sync/history?modelType=${modelType}&limit=5`
+      )
      if (response.ok) {
        const data = await response.json()
        setHistory(data.history || [])
--- a/apps/web/src/app/vision-training/loader-test-async/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-async/page.tsx
@ -77,7 +77,8 @@ export default function LoaderTestAsyncPage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-bare/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-bare/page.tsx
@ -80,7 +80,8 @@ export default function LoaderTestBarePage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-check/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-check/page.tsx
@ -32,9 +32,7 @@ export default function LoaderTestCheckPage() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Check window.cv (No Loading)
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Check window.cv (No Loading)</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        Just checks if window.cv exists - no loading.
      </p>
--- a/apps/web/src/app/vision-training/loader-test-direct/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-direct/page.tsx
@ -47,9 +47,7 @@ export default function LoaderTestDirectPage() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Direct Import Test
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Direct Import Test</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        Imports directly from loader.ts (not barrel index.ts).
      </p>
@ -80,7 +78,8 @@ export default function LoaderTestDirectPage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-hook-custom/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-hook-custom/page.tsx
@ -49,9 +49,7 @@ export default function LoaderTestHookCustomPage() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Custom Hook Test
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Custom Hook Test</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        Uses custom useOpenCV hook from separate file.
      </p>
@ -82,7 +80,8 @@ export default function LoaderTestHookCustomPage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-hook/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-hook/page.tsx
@ -82,7 +82,8 @@ export default function LoaderTestHookPage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-inline/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-inline/page.tsx
@ -132,9 +132,7 @@ export default function LoaderTestInlinePage() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Inline Loader Test
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Inline Loader Test</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        Loader code is INLINE in this component (not imported from module).
      </p>
@ -165,7 +163,8 @@ export default function LoaderTestInlinePage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-script/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-script/page.tsx
@ -41,9 +41,7 @@ export default function LoaderTestScriptPage() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Script Tag Test (No Waiting)
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Script Tag Test (No Waiting)</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        Step 1: Add script tag. Step 2: Check if cv loaded.
      </p>
--- a/apps/web/src/app/vision-training/loader-test-simple/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-simple/page.tsx
@ -79,7 +79,8 @@ export default function LoaderTestSimplePage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-v2/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-v2/page.tsx
@ -43,9 +43,7 @@ export default function LoaderTestV2Page() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Loader V2 Test
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Loader V2 Test</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        Uses new loaderV2.ts with proven working pattern.
      </p>
@ -76,7 +74,8 @@ export default function LoaderTestV2Page() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-v3/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-v3/page.tsx
@ -74,7 +74,8 @@ export default function LoaderTestV3Page() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-v4/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-v4/page.tsx
@ -74,7 +74,8 @@ export default function LoaderTestV4Page() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-v5/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-v5/page.tsx
@ -44,9 +44,7 @@ export default function LoaderTestV5Page() {
      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
        Loader V5 Test (No Internal Await)
      </h1>
-      <p className={css({ color: 'gray.400', mb: 4 })}>
-        Returns Promise, consumer awaits it.
-      </p>
+      <p className={css({ color: 'gray.400', mb: 4 })}>Returns Promise, consumer awaits it.</p>

      <button
        type="button"
@ -74,7 +72,8 @@ export default function LoaderTestV5Page() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-wait/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-wait/page.tsx
@ -46,9 +46,7 @@ export default function LoaderTestWaitPage() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Wait Test (Imported Promise)
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Wait Test (Imported Promise)</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        Adds script tag then waits with imported Promise function.
      </p>
@ -79,7 +77,8 @@ export default function LoaderTestWaitPage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test-wrapped/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test-wrapped/page.tsx
@ -51,9 +51,7 @@ export default function LoaderTestWrappedPage() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Wrapped Import Test
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Wrapped Import Test</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        Imported loadOpenCV wrapped in useCallback.
      </p>
@ -84,7 +82,8 @@ export default function LoaderTestWrappedPage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/loader-test/page.tsx
+++ b/apps/web/src/app/vision-training/loader-test/page.tsx
@ -47,9 +47,7 @@ export default function LoaderTestPage() {
        gap: 4,
      })}
    >
-      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
-        Minimal OpenCV Loader Test
-      </h1>
+      <h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Minimal OpenCV Loader Test</h1>
      <p className={css({ color: 'gray.400', mb: 4 })}>
        This page ONLY imports the standalone loader. No useDocumentDetection.
      </p>
@ -80,7 +78,8 @@ export default function LoaderTestPage() {
          Status:{' '}
          <span
            className={css({
-              color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
+              color:
+                status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
            })}
          >
            {status}
--- a/apps/web/src/app/vision-training/quad-test/page.tsx
+++ b/apps/web/src/app/vision-training/quad-test/page.tsx
--- a/apps/web/src/app/vision-training/train/components/data-panel/UnifiedDataPanel.tsx
+++ b/apps/web/src/app/vision-training/train/components/data-panel/UnifiedDataPanel.tsx
@ -534,7 +534,9 @@ export function UnifiedDataPanel({ modelType, onDataChanged }: UnifiedDataPanelP
  const handleStartSync = useCallback(async () => {
    setSyncProgress({ phase: 'connecting', message: 'Connecting...' })
    try {
-      const response = await fetch(`/api/vision-training/sync?modelType=${modelType}`, { method: 'POST' })
+      const response = await fetch(`/api/vision-training/sync?modelType=${modelType}`, {
+        method: 'POST',
+      })
      if (response.ok) {
        setSyncProgress({ phase: 'complete', message: 'Sync complete!' })
        loadItems()
--- a/apps/web/src/components/practice/useDocumentDetection.ts
+++ b/apps/web/src/components/practice/useDocumentDetection.ts
@ -1,129 +1,33 @@
 'use client'

-import { useCallback, useRef, useState } from 'react'
+import { useCallback, useEffect, useRef, useState } from 'react'
+import {
+  createQuadDetector,
+  type DetectedQuad as ModularDetectedQuad,
+  type QuadDetectorConfig,
+} from '@/lib/vision/quadDetector'
+import { createQuadTracker, type TrackedQuad as ModularTrackedQuad } from '@/lib/vision/quadTracker'
+import type { CV, CVMat } from '@/lib/vision/opencv/types'
+
+// Re-export config type for consumers
+export type { QuadDetectorConfig } from '@/lib/vision/quadDetector'

 /**
 * Hook for document detection using OpenCV.js directly
 *
 * Features:
 * - Lazy loads OpenCV.js (~8MB) only when first used
- * - Multi-quad tracking: detects ALL quadrilaterals, not just the largest
+ * - Uses modular quadDetector and quadTracker from @/lib/vision
 * - Scores quads by: size, aspect ratio, and temporal stability
 * - Filters out small quads (likely printed on page) vs page-sized quads
 * - Provides highlightDocument for drawing detected quad on overlay
 * - Provides extractDocument for cropping/deskewing captured image
 */

-// OpenCV.js types (minimal interface for what we use)
-interface CVMat {
-  delete: () => void
-  data32S: Int32Array
-  rows: number
-  cols: number
-}
-
-interface CVMatVector {
-  size: () => number
-  get: (i: number) => CVMat
-  delete: () => void
-}
-
-interface CVSize {
-  width: number
-  height: number
-}
-
-interface CVPoint {
-  x: number
-  y: number
-}
-
-interface CV {
-  Mat: new () => CVMat
-  MatVector: new () => CVMatVector
-  Size: new (w: number, h: number) => CVSize
-  Scalar: new (r?: number, g?: number, b?: number, a?: number) => unknown
-  imread: (canvas: HTMLCanvasElement) => CVMat
-  imshow: (canvas: HTMLCanvasElement, mat: CVMat) => void
-  cvtColor: (src: CVMat, dst: CVMat, code: number) => void
-  GaussianBlur: (
-    src: CVMat,
-    dst: CVMat,
-    size: CVSize,
-    sigmaX: number,
-    sigmaY: number,
-    borderType: number
-  ) => void
-  Canny: (src: CVMat, dst: CVMat, t1: number, t2: number) => void
-  dilate: (src: CVMat, dst: CVMat, kernel: CVMat, anchor: CVPoint, iterations: number) => void
-  findContours: (
-    src: CVMat,
-    contours: CVMatVector,
-    hierarchy: CVMat,
-    mode: number,
-    method: number
-  ) => void
-  contourArea: (contour: CVMat) => number
-  arcLength: (contour: CVMat, closed: boolean) => number
-  approxPolyDP: (contour: CVMat, approx: CVMat, epsilon: number, closed: boolean) => void
-  getPerspectiveTransform: (src: CVMat, dst: CVMat) => CVMat
-  warpPerspective: (
-    src: CVMat,
-    dst: CVMat,
-    M: CVMat,
-    size: CVSize,
-    flags: number,
-    borderMode: number,
-    borderValue: unknown
-  ) => void
-  warpAffine: (
-    src: CVMat,
-    dst: CVMat,
-    M: CVMat,
-    size: CVSize,
-    flags?: number,
-    borderMode?: number,
-    borderValue?: unknown
-  ) => void
-  getRotationMatrix2D: (center: CVPoint, angle: number, scale: number) => CVMat
-  rotate: (src: CVMat, dst: CVMat, rotateCode: number) => void
-  countNonZero: (src: CVMat) => number
-  matFromArray: (rows: number, cols: number, type: number, data: number[]) => CVMat
-  COLOR_RGBA2GRAY: number
-  BORDER_DEFAULT: number
-  RETR_LIST: number
-  CHAIN_APPROX_SIMPLE: number
-  CV_32FC2: number
-  INTER_LINEAR: number
-  BORDER_CONSTANT: number
-  ROTATE_90_CLOCKWISE: number
-  ROTATE_180: number
-  ROTATE_90_COUNTERCLOCKWISE: number
-}
-
-/** Represents a detected quadrilateral with corner points */
-interface DetectedQuad {
-  corners: Array<{ x: number; y: number }>
-  area: number
-  aspectRatio: number
-  // Unique ID based on approximate center position
-  centerId: string
-}
-
-/** Tracked quad candidate with history */
-interface TrackedQuad {
-  id: string
-  corners: Array<{ x: number; y: number }>
-  area: number
-  aspectRatio: number
-  /** How many frames this quad has been seen */
-  frameCount: number
-  /** Last frame number when this quad was seen */
-  lastSeenFrame: number
-  /** Stability score based on corner consistency */
-  stabilityScore: number
-  /** History of corner positions for stability calculation */
-  cornerHistory: Array<Array<{ x: number; y: number }>>
+/** Internal tracked quad type for backward compatibility */
+interface TrackedQuad extends ModularTrackedQuad {
+  /** History of corner positions for stability calculation (used by extractDocument) */
+  cornerHistory?: Array<Array<{ x: number; y: number }>>
 }

 export interface DocumentDetectionDebugInfo {
@ -143,28 +47,12 @@ export interface DocumentDetectionDebugInfo {
  lastDetectionError: string | null
 }

-/** Number of frames to track quad history */
-const HISTORY_LENGTH = 10
 /** Minimum frames a quad must be seen to be considered stable */
 const MIN_FRAMES_FOR_STABLE = 3
 /** Minimum frames for "locked" state */
 const LOCKED_FRAME_COUNT = 5
-/** Maximum distance (as % of frame diagonal) for quads to be considered "same" */
-const QUAD_MATCH_THRESHOLD = 0.08
-/** Minimum area as % of frame for a quad to be considered page-sized */
-const MIN_AREA_RATIO = 0.15
-/** Maximum area as % of frame (filter out frame edges detected as quad) */
-const MAX_AREA_RATIO = 0.95
-/** Expected aspect ratios for documents (width/height) */
-const EXPECTED_ASPECT_RATIOS = [
-  8.5 / 11, // US Letter portrait
-  11 / 8.5, // US Letter landscape
-  1 / Math.sqrt(2), // A4 portrait
-  Math.sqrt(2), // A4 landscape
-  1, // Square
-]
-/** How close aspect ratio must be to expected (tolerance) */
-const ASPECT_RATIO_TOLERANCE = 0.3
+/** Minimum stability score for locked state */
+const MIN_STABILITY_FOR_LOCKED = 0.5

 export interface DetectQuadsInImageResult {
  /** Whether a document quad was detected */
@ -226,18 +114,44 @@ export interface UseDocumentDetectionReturn {
   * Returns the canvas, or null if loading failed
   */
  loadImageToCanvas: (file: File) => Promise<HTMLCanvasElement | null>
+  /**
+   * Reset all tracking state (call when returning from adjustment mode)
+   */
+  resetTracking: () => void
+  /**
+   * Update detector configuration (recreates detector with new settings)
+   */
+  updateDetectorConfig: (config: Partial<QuadDetectorConfig>) => void
+  /**
+   * Current detector configuration
+   */
+  detectorConfig: Partial<QuadDetectorConfig>
 }

-export function useDocumentDetection(): UseDocumentDetectionReturn {
+export interface UseDocumentDetectionOptions {
+  /** Initial detector configuration */
+  detectorConfig?: Partial<QuadDetectorConfig>
+}
+
+export function useDocumentDetection(
+  options?: UseDocumentDetectionOptions
+): UseDocumentDetectionReturn {
  // Start with isLoading=false since we won't load until requested
  const [isLoading, setIsLoading] = useState(false)
  const [error, setError] = useState<string | null>(null)
  const cvRef = useRef<CV | null>(null)
  const loadPromiseRef = useRef<Promise<void> | null>(null)

-  // Multi-quad tracking
-  const trackedQuadsRef = useRef<Map<string, TrackedQuad>>(new Map())
-  const frameCountRef = useRef(0)
+  // Detector configuration (can be updated dynamically)
+  const [detectorConfig, setDetectorConfig] = useState<Partial<QuadDetectorConfig>>(
+    options?.detectorConfig ?? {}
+  )
+
+  // Modular detector and tracker (created after OpenCV loads)
+  const detectorRef = useRef<ReturnType<typeof createQuadDetector> | null>(null)
+  const trackerRef = useRef<ReturnType<typeof createQuadTracker> | null>(null)
+
+  // Best quad tracking
  const bestQuadRef = useRef<TrackedQuad | null>(null)
  const lastStableFrameRef = useRef<HTMLCanvasElement | null>(null)

@ -348,6 +262,15 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {

        // Store OpenCV reference
        cvRef.current = (window as unknown as { cv: CV }).cv
+
+        // Create modular detector and tracker with current config
+        detectorRef.current = createQuadDetector(cvRef.current, detectorConfig)
+        trackerRef.current = createQuadTracker({
+          minFramesForStable: MIN_FRAMES_FOR_STABLE,
+          minFramesForLocked: LOCKED_FRAME_COUNT,
+          minStabilityForLocked: MIN_STABILITY_FOR_LOCKED,
+        })
+
        const loadTime = Date.now() - loadStartTimeRef.current
        setDebugInfo((prev) => ({ ...prev, loadTimeMs: loadTime }))
        setIsLoading(false)
@ -365,7 +288,19 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
    } catch {
      return false
    }
-  }, [isOpenCVReady])
+  }, [isOpenCVReady, detectorConfig])
+
+  // Recreate detector when config changes (if OpenCV is already loaded)
+  useEffect(() => {
+    if (cvRef.current && detectorRef.current) {
+      detectorRef.current = createQuadDetector(cvRef.current, detectorConfig)
+    }
+  }, [detectorConfig])
+
+  // Update detector config function
+  const updateDetectorConfig = useCallback((newConfig: Partial<QuadDetectorConfig>) => {
+    setDetectorConfig((prev) => ({ ...prev, ...newConfig }))
+  }, [])

  // Reusable canvas for video frame capture
  const frameCanvasRef = useRef<HTMLCanvasElement | null>(null)
@ -391,7 +326,7 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
    return frameCanvas
  }, [])

-  // Calculate distance between two points
+  // Calculate distance between two points (kept for extractDocument)
  const distance = useCallback(
    (p1: { x: number; y: number }, p2: { x: number; y: number }): number => {
      return Math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)
@ -399,298 +334,6 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
    []
  )

-  // Order corners: top-left, top-right, bottom-right, bottom-left
-  const orderCorners = useCallback(
-    (corners: Array<{ x: number; y: number }>): Array<{ x: number; y: number }> => {
-      if (corners.length !== 4) return corners
-
-      // Find centroid
-      const cx = corners.reduce((s, c) => s + c.x, 0) / 4
-      const cy = corners.reduce((s, c) => s + c.y, 0) / 4
-
-      // Sort by angle from centroid
-      const sorted = [...corners].sort((a, b) => {
-        const angleA = Math.atan2(a.y - cy, a.x - cx)
-        const angleB = Math.atan2(b.y - cy, b.x - cx)
-        return angleA - angleB
-      })
-
-      // Find top-left (smallest x+y)
-      let topLeftIdx = 0
-      let minSum = Infinity
-      for (let i = 0; i < 4; i++) {
-        const sum = sorted[i].x + sorted[i].y
-        if (sum < minSum) {
-          minSum = sum
-          topLeftIdx = i
-        }
-      }
-
-      // Rotate array so top-left is first
-      const ordered = []
-      for (let i = 0; i < 4; i++) {
-        ordered.push(sorted[(topLeftIdx + i) % 4])
-      }
-
-      return ordered
-    },
-    []
-  )
-
-  // Check if aspect ratio is document-like
-  const isDocumentAspectRatio = useCallback((ratio: number): boolean => {
-    return EXPECTED_ASPECT_RATIOS.some(
-      (expected) => Math.abs(ratio - expected) < ASPECT_RATIO_TOLERANCE
-    )
-  }, [])
-
-  // Generate a stable ID for a quad based on its center position
-  const getQuadCenterId = useCallback(
-    (corners: Array<{ x: number; y: number }>, frameWidth: number, frameHeight: number): string => {
-      const cx = corners.reduce((s, c) => s + c.x, 0) / 4
-      const cy = corners.reduce((s, c) => s + c.y, 0) / 4
-      // Quantize to grid cells (10x10 grid)
-      const gridX = Math.floor((cx / frameWidth) * 10)
-      const gridY = Math.floor((cy / frameHeight) * 10)
-      return `${gridX},${gridY}`
-    },
-    []
-  )
-
-  // Check if two quads are similar (same document)
-  const quadsMatch = useCallback(
-    (
-      q1: Array<{ x: number; y: number }>,
-      q2: Array<{ x: number; y: number }>,
-      frameDiagonal: number
-    ): boolean => {
-      const threshold = frameDiagonal * QUAD_MATCH_THRESHOLD
-      let totalDist = 0
-      for (let i = 0; i < 4; i++) {
-        totalDist += distance(q1[i], q2[i])
-      }
-      return totalDist / 4 < threshold
-    },
-    [distance]
-  )
-
-  // Calculate corner stability (how much corners move between frames)
-  const calculateCornerStability = useCallback(
-    (history: Array<Array<{ x: number; y: number }>>): number => {
-      if (history.length < 2) return 0
-
-      let totalVariance = 0
-      for (let corner = 0; corner < 4; corner++) {
-        const xs = history.map((h) => h[corner].x)
-        const ys = history.map((h) => h[corner].y)
-        const meanX = xs.reduce((a, b) => a + b, 0) / xs.length
-        const meanY = ys.reduce((a, b) => a + b, 0) / ys.length
-        const varX = xs.reduce((a, b) => a + (b - meanX) ** 2, 0) / xs.length
-        const varY = ys.reduce((a, b) => a + (b - meanY) ** 2, 0) / ys.length
-        totalVariance += Math.sqrt(varX + varY)
-      }
-
-      // Convert variance to stability score (lower variance = higher stability)
-      // Normalize: variance of 0 = stability 1, variance of 50+ = stability 0
-      const avgVariance = totalVariance / 4
-      return Math.max(0, 1 - avgVariance / 50)
-    },
-    []
-  )
-
-  // Find all quadrilaterals in the frame using OpenCV
-  const findAllQuads = useCallback(
-    (frameCanvas: HTMLCanvasElement): DetectedQuad[] => {
-      const cv = cvRef.current
-      if (!cv) return []
-
-      const quads: DetectedQuad[] = []
-      const frameArea = frameCanvas.width * frameCanvas.height
-      const frameDiagonal = Math.sqrt(frameCanvas.width ** 2 + frameCanvas.height ** 2)
-
-      // OpenCV processing
-      let src: CVMat | null = null
-      let gray: CVMat | null = null
-      let blurred: CVMat | null = null
-      let edges: CVMat | null = null
-      let contours: CVMatVector | null = null
-      let hierarchy: CVMat | null = null
-
-      try {
-        src = cv.imread(frameCanvas)
-        gray = new cv.Mat()
-        blurred = new cv.Mat()
-        edges = new cv.Mat()
-
-        // Convert to grayscale
-        cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY)
-
-        // Blur to reduce noise
-        cv.GaussianBlur(gray, blurred, new cv.Size(5, 5), 0, 0, cv.BORDER_DEFAULT)
-
-        // Edge detection
-        cv.Canny(blurred, edges, 50, 150)
-
-        // Dilate edges to connect gaps
-        const kernel = new cv.Mat()
-        cv.dilate(edges, edges, kernel, { x: -1, y: -1 } as CVPoint, 1)
-        kernel.delete()
-
-        // Find contours
-        contours = new cv.MatVector()
-        hierarchy = new cv.Mat()
-        cv.findContours(edges, contours, hierarchy, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
-
-        // Process each contour
-        for (let i = 0; i < contours.size(); i++) {
-          const contour = contours.get(i)
-          const area = cv.contourArea(contour)
-          const areaRatio = area / frameArea
-
-          // Skip if too small or too large
-          if (areaRatio < MIN_AREA_RATIO || areaRatio > MAX_AREA_RATIO) {
-            continue
-          }
-
-          // Approximate to polygon
-          const approx = new cv.Mat()
-          const perimeter = cv.arcLength(contour, true)
-          cv.approxPolyDP(contour, approx, 0.02 * perimeter, true)
-
-          // Check if it's a quadrilateral
-          if (approx.rows === 4) {
-            // Extract corners
-            const corners: Array<{ x: number; y: number }> = []
-            for (let j = 0; j < 4; j++) {
-              corners.push({
-                x: approx.data32S[j * 2],
-                y: approx.data32S[j * 2 + 1],
-              })
-            }
-
-            // Order corners consistently
-            const orderedCorners = orderCorners(corners)
-
-            // Calculate aspect ratio
-            const width = distance(orderedCorners[0], orderedCorners[1])
-            const height = distance(orderedCorners[1], orderedCorners[2])
-            const aspectRatio = Math.max(width, height) / Math.min(width, height)
-
-            // Check if aspect ratio is document-like
-            if (isDocumentAspectRatio(aspectRatio)) {
-              quads.push({
-                corners: orderedCorners,
-                area,
-                aspectRatio,
-                centerId: getQuadCenterId(orderedCorners, frameCanvas.width, frameCanvas.height),
-              })
-            }
-          }
-
-          approx.delete()
-        }
-      } finally {
-        // Clean up OpenCV memory
-        src?.delete()
-        gray?.delete()
-        blurred?.delete()
-        edges?.delete()
-        contours?.delete()
-        hierarchy?.delete()
-      }
-
-      // Sort by area (largest first)
-      quads.sort((a, b) => b.area - a.area)
-
-      return quads
-    },
-    [distance, orderCorners, isDocumentAspectRatio, getQuadCenterId]
-  )
-
-  // Update tracked quads with new detections
-  const updateTrackedQuads = useCallback(
-    (
-      detectedQuads: DetectedQuad[],
-      frameWidth: number,
-      frameHeight: number
-    ): TrackedQuad | null => {
-      const currentFrame = frameCountRef.current++
-      const trackedQuads = trackedQuadsRef.current
-      const frameDiagonal = Math.sqrt(frameWidth ** 2 + frameHeight ** 2)
-
-      // Mark all tracked quads as not seen this frame
-      const seenIds = new Set<string>()
-
-      // Match detected quads to tracked quads
-      for (const detected of detectedQuads) {
-        let matched = false
-
-        for (const [id, tracked] of trackedQuads) {
-          if (!seenIds.has(id) && quadsMatch(detected.corners, tracked.corners, frameDiagonal)) {
-            // Update existing tracked quad
-            tracked.corners = detected.corners
-            tracked.area = detected.area
-            tracked.aspectRatio = detected.aspectRatio
-            tracked.frameCount++
-            tracked.lastSeenFrame = currentFrame
-            tracked.cornerHistory.push(detected.corners)
-            if (tracked.cornerHistory.length > HISTORY_LENGTH) {
-              tracked.cornerHistory.shift()
-            }
-            tracked.stabilityScore = calculateCornerStability(tracked.cornerHistory)
-            seenIds.add(id)
-            matched = true
-            break
-          }
-        }
-
-        if (!matched) {
-          // New quad - start tracking
-          const newId = `quad_${currentFrame}_${Math.random().toString(36).slice(2, 8)}`
-          trackedQuads.set(newId, {
-            id: newId,
-            corners: detected.corners,
-            area: detected.area,
-            aspectRatio: detected.aspectRatio,
-            frameCount: 1,
-            lastSeenFrame: currentFrame,
-            stabilityScore: 0,
-            cornerHistory: [detected.corners],
-          })
-          seenIds.add(newId)
-        }
-      }
-
-      // Remove quads not seen for a while
-      for (const [id, tracked] of trackedQuads) {
-        if (currentFrame - tracked.lastSeenFrame > 3) {
-          trackedQuads.delete(id)
-        }
-      }
-
-      // Find best quad (highest score = frameCount * stability * area)
-      let bestQuad: TrackedQuad | null = null
-      let bestScore = 0
-
-      for (const tracked of trackedQuads.values()) {
-        // Only consider quads seen recently
-        if (currentFrame - tracked.lastSeenFrame > 2) continue
-
-        // Score: prioritize stability and longevity, then area
-        const score = tracked.frameCount * (0.5 + tracked.stabilityScore) * Math.sqrt(tracked.area)
-
-        if (score > bestScore) {
-          bestScore = score
-          bestQuad = tracked
-        }
-      }
-
-      bestQuadRef.current = bestQuad
-      return bestQuad
-    },
-    [quadsMatch, calculateCornerStability]
-  )
-
  // Draw quad on overlay canvas
  const drawQuad = useCallback(
    (
@ -725,8 +368,9 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {

  const highlightDocument = useCallback(
    (video: HTMLVideoElement, overlayCanvas: HTMLCanvasElement): boolean => {
-      const cv = cvRef.current
-      if (!cv) return false
+      const detector = detectorRef.current
+      const tracker = trackerRef.current
+      if (!detector || !tracker) return false

      const startTime = performance.now()

@ -755,29 +399,32 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
        // Clear overlay
        overlayCtx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height)

-        // Find all quads in this frame
-        const detectedQuads = findAllQuads(frameCanvas)
+        // Use modular detector
+        const detectedQuads = detector.detect(frameCanvas)

-        // Update tracking and get best quad
-        const bestQuad = updateTrackedQuads(detectedQuads, frameCanvas.width, frameCanvas.height)
+        // Use modular tracker
+        const bestQuad = tracker.update(detectedQuads, {
+          width: frameCanvas.width,
+          height: frameCanvas.height,
+        })
+        const stats = tracker.getStats()

        const detectionTime = performance.now() - startTime

        // Draw all detected quads (faded) for debugging
        for (const quad of detectedQuads) {
-          if (bestQuad && quad.centerId === bestQuad.id) continue
          drawQuad(overlayCtx, quad.corners, 'rgba(100, 100, 100, 0.3)', 2)
        }

        // Draw best quad with color based on stability
        if (bestQuad) {
-          const isStable = bestQuad.frameCount >= MIN_FRAMES_FOR_STABLE
-          const isLocked = bestQuad.frameCount >= LOCKED_FRAME_COUNT
+          // Update bestQuadRef for extractDocument
+          bestQuadRef.current = bestQuad

          let color: string
          let lineWidth: number

-          if (isLocked && bestQuad.stabilityScore > 0.5) {
+          if (bestQuad.isLocked) {
            color = 'rgba(0, 255, 100, 0.95)'
            lineWidth = 6
            // Save stable frame
@ -788,7 +435,7 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
            lastStableFrameRef.current.height = frameCanvas.height
            const stableCtx = lastStableFrameRef.current.getContext('2d')
            stableCtx?.drawImage(frameCanvas, 0, 0)
-          } else if (isStable) {
+          } else if (bestQuad.isStable) {
            color = 'rgba(100, 255, 100, 0.85)'
            lineWidth = 5
          } else {
@ -797,6 +444,8 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
          }

          drawQuad(overlayCtx, bestQuad.corners, color, lineWidth)
+        } else {
+          bestQuadRef.current = null
        }

        // Update debug info
@ -804,7 +453,7 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
          ...prev,
          lastDetectionMs: Math.round(detectionTime),
          quadsDetected: detectedQuads.length,
-          trackedQuads: trackedQuadsRef.current.size,
+          trackedQuads: stats.trackedCount,
          bestQuadStability: bestQuad?.stabilityScore ?? 0,
          bestQuadFrameCount: bestQuad?.frameCount ?? 0,
          lastDetectionError: null,
@ -819,7 +468,7 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
        return false
      }
    },
-    [captureVideoFrame, findAllQuads, updateTrackedQuads, drawQuad]
+    [captureVideoFrame, drawQuad]
  )

  /**
@ -1085,11 +734,24 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
    [captureVideoFrame, distance, analyzeOrientation, rotateCanvas]
  )

-  // Compute derived state
+  // Reset tracking state (call when returning from adjustment mode)
+  const resetTracking = useCallback(() => {
+    trackerRef.current?.reset()
+    bestQuadRef.current = null
+    lastStableFrameRef.current = null
+    setDebugInfo((prev) => ({
+      ...prev,
+      quadsDetected: 0,
+      trackedQuads: 0,
+      bestQuadStability: 0,
+      bestQuadFrameCount: 0,
+    }))
+  }, [])
+
+  // Compute derived state (use isStable/isLocked from tracked quad)
  const bestQuad = bestQuadRef.current
-  const isStable = bestQuad ? bestQuad.frameCount >= MIN_FRAMES_FOR_STABLE : false
-  const isLocked =
-    bestQuad && bestQuad.frameCount >= LOCKED_FRAME_COUNT && bestQuad.stabilityScore > 0.5
+  const isStable = bestQuad?.isStable ?? false
+  const isLocked = bestQuad?.isLocked ?? false

  // Get current best quad corners
  const getBestQuadCorners = useCallback((): Array<{
@ -1152,59 +814,57 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
   * Detect quads in a static image (for file uploads and gallery edits)
   * Returns detected corners or fallback corners (full image)
   */
-  const detectQuadsInImage = useCallback(
-    (canvas: HTMLCanvasElement): DetectQuadsInImageResult => {
-      // Fallback corners (full image)
-      const fallbackCorners = [
-        { x: 0, y: 0 },
-        { x: canvas.width, y: 0 },
-        { x: canvas.width, y: canvas.height },
-        { x: 0, y: canvas.height },
-      ]
+  const detectQuadsInImage = useCallback((canvas: HTMLCanvasElement): DetectQuadsInImageResult => {
+    // Fallback corners (full image)
+    const fallbackCorners = [
+      { x: 0, y: 0 },
+      { x: canvas.width, y: 0 },
+      { x: canvas.width, y: canvas.height },
+      { x: 0, y: canvas.height },
+    ]

-      if (!cvRef.current) {
+    const detector = detectorRef.current
+    if (!detector) {
+      return {
+        detected: false,
+        corners: fallbackCorners,
+        sourceCanvas: canvas,
+      }
+    }
+
+    try {
+      // Use modular detector
+      const detectedQuads = detector.detect(canvas)
+
+      if (detectedQuads.length > 0) {
+        // Return the best quad (largest area, already sorted)
        return {
-          detected: false,
-          corners: fallbackCorners,
+          detected: true,
+          corners: detectedQuads[0].corners,
          sourceCanvas: canvas,
        }
      }

-      try {
-        // Run quad detection on the canvas
-        const detectedQuads = findAllQuads(canvas)
-
-        if (detectedQuads.length > 0) {
-          // Return the best quad (largest area, already sorted)
-          return {
-            detected: true,
-            corners: detectedQuads[0].corners,
-            sourceCanvas: canvas,
-          }
-        }
-
-        // No quads detected - return fallback
-        return {
-          detected: false,
-          corners: fallbackCorners,
-          sourceCanvas: canvas,
-        }
-      } catch (err) {
-        console.warn('Quad detection failed:', err)
-        return {
-          detected: false,
-          corners: fallbackCorners,
-          sourceCanvas: canvas,
-        }
+      // No quads detected - return fallback
+      return {
+        detected: false,
+        corners: fallbackCorners,
+        sourceCanvas: canvas,
      }
-    },
-    [findAllQuads]
-  )
+    } catch (err) {
+      console.warn('Quad detection failed:', err)
+      return {
+        detected: false,
+        corners: fallbackCorners,
+        sourceCanvas: canvas,
+      }
+    }
+  }, [])

  return {
    isLoading,
    error,
-    isReady: !isLoading && !error && cvRef.current !== null,
+    isReady: !isLoading && !error && detectorRef.current !== null,
    ensureOpenCVLoaded,
    isStable,
    isLocked: !!isLocked,
@ -1216,6 +876,9 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
    extractDocument,
    detectQuadsInImage,
    loadImageToCanvas,
+    resetTracking,
+    updateDetectorConfig,
+    detectorConfig,
  }
 }

--- a/apps/web/src/hooks/useOpenCV.ts
+++ b/apps/web/src/hooks/useOpenCV.ts
@ -9,11 +9,7 @@

 import { useCallback, useRef, useState } from 'react'
 import type { CV } from '@/lib/vision/opencv/types'
-import {
-  loadOpenCV as loadOpenCVCore,
-  getOpenCV,
-  isOpenCVReady,
-} from '@/lib/vision/opencv/loader'
+import { loadOpenCV as loadOpenCVCore, getOpenCV, isOpenCVReady } from '@/lib/vision/opencv/loader'

 export interface UseOpenCVReturn {
  /** OpenCV instance (null if not loaded) */
--- a/apps/web/src/hooks/useQuadDetection.ts
+++ b/apps/web/src/hooks/useQuadDetection.ts
@ -24,7 +24,12 @@ export type {
 } from '@/lib/vision/quadDetection'

 // Re-export utility functions
-export { loadImageToCanvas, captureVideoFrame, orderCorners, distance } from '@/lib/vision/quadDetection'
+export {
+  loadImageToCanvas,
+  captureVideoFrame,
+  orderCorners,
+  distance,
+} from '@/lib/vision/quadDetection'

 /**
 * React hook for quad detection in static images.
@ -89,13 +94,10 @@ export function useQuadDetection(options?: QuadDetectionOptions) {
   * Detect quads in a canvas.
   * Returns null if detector is not loaded.
   */
-  const detect = useCallback(
-    (canvas: HTMLCanvasElement): QuadDetectionResult | null => {
-      if (!detectorRef.current) return null
-      return detectorRef.current.detect(canvas, optionsRef.current)
-    },
-    []
-  )
+  const detect = useCallback((canvas: HTMLCanvasElement): QuadDetectionResult | null => {
+    if (!detectorRef.current) return null
+    return detectorRef.current.detect(canvas, optionsRef.current)
+  }, [])

  /**
   * Detect quads in an image file.
@ -155,10 +157,13 @@ export function useQuadDetection(options?: QuadDetectionOptions) {
  /**
   * Extract a quad region using perspective transform.
   */
-  const extract = useCallback((canvas: HTMLCanvasElement, corners: Corner[]): HTMLCanvasElement | null => {
-    if (!detectorRef.current) return null
-    return detectorRef.current.extract(canvas, corners)
-  }, [])
+  const extract = useCallback(
+    (canvas: HTMLCanvasElement, corners: Corner[]): HTMLCanvasElement | null => {
+      if (!detectorRef.current) return null
+      return detectorRef.current.extract(canvas, corners)
+    },
+    []
+  )

  /**
   * Analyze document orientation.
--- a/apps/web/src/lib/vision/DOCUMENT_DETECTION_RESEARCH.md
+++ b/apps/web/src/lib/vision/DOCUMENT_DETECTION_RESEARCH.md
@ -0,0 +1,241 @@
+# Document Detection Research
+
+Research notes on improving quad/document detection, particularly for handling finger occlusion and complex backgrounds.
+
+**Date**: January 2026
+**Context**: Current OpenCV-based quad detection struggles with finger occlusion and busy backgrounds.
+
+---
+
+## The Core Problem
+
+Standard Canny edge detection fails for document scanning because:
+> "The sections of text inside the document are strongly amplified, whereas the document edges—what we're interested in—show up very weakly."
+> — Dropbox Engineering
+
+Traditional CV approaches (Canny + Hough) can only work with **visible edges**. When fingers occlude document corners, the edge pixels simply aren't there.
+
+---
+
+## Industry Approaches
+
+### Dropbox (2016)
+**Source**: [Fast and Accurate Document Detection for Scanning](https://dropbox.tech/machine-learning/fast-and-accurate-document-detection-for-scanning)
+
+1. **Custom ML-based edge detector** - trained to suppress text edges while keeping document boundaries (details proprietary)
+2. **Hough transform** for line detection from the cleaned edge map
+3. **Quadrilateral scoring** - enumerate all possible quads from line intersections, score each by summing edge probabilities along perimeter
+4. **Result**: 8-10 FPS, 60% fewer manual corrections vs Apple's SDK
+
+Follow-up: [Improving the Responsiveness of the Document Detector](https://dropbox.tech/machine-learning/improving-the-responsiveness-of-the-document-detector)
+- Motion-based quad tracking between frames
+- Hybrid: full detection every ~100ms + fast tracking on intermediate frames
+
+### Genius Scan (2024)
+**Source**: [Document Detection - How Deep Learning Has Changed The Game](https://blog.thegrizzlylabs.com/2024/10/document-detection.html)
+
+**Key insight**: Combining DL + traditional CV raised accuracy from 51% → 75% → 85%:
+- DL provides **robustness** (handles occlusion, complex backgrounds)
+- Traditional CV provides **precision** (sub-pixel corner refinement)
+
+Architecture:
+- MobileNet V2 backbone
+- Input resolution: 96×96 pixels
+- Training dataset: 1M+ images
+- Pre-training: ImageNet, fine-tuned on document data
+- Performance: 25+ FPS on mobile
+
+### Scanner Pro (Readdle)
+**Source**: [Inside ScannerPro: the Tech behind perfect scans](https://readdle.com/blog/scanner-pro-border-detection)
+
+Evolution:
+1. Traditional CV (Canny + Hough) - baseline
+2. Semantic segmentation - too slow (5 FPS on iPhone X)
+3. **Keypoint detection** - direct corner prediction, 30+ FPS
+
+Key techniques:
+- MobileNet-based keypoint detector
+- Kalman filter + IMU data for temporal smoothing
+- Two-stage: lightweight detector for streaming, heavier model on capture
+
+### Academic Approaches
+
+**Multi-document detection via corner localization**:
+- Joint Corner Detector (JCD) with attention mechanism
+- Coarse-to-fine: rough prediction → corner-specific refinement
+- Datasets: ICDAR 2015 SmartDoc, SEECS-NUSF, MIDV-500
+
+**Semantic segmentation** (LearnOpenCV tutorial):
+- DeepLabv3 with MobileNetV3-Large backbone
+- Binary segmentation (document vs background)
+- Trained on synthetic data with augmentation
+- Extract corners from mask via contour detection
+
+---
+
+## Modern Architecture Pattern
+
+The recommended approach for robust document detection:
+
+```
+Input Image (downscaled to 96-256px)
+        ↓
+┌───────────────────┐
+│  MobileNetV2/V3   │  ← Pretrained on ImageNet
+│  Feature Extractor│
+└─────────┬─────────┘
+          ↓
+┌───────────────────┐
+│  Regression Head  │  ← 8 outputs (x,y for 4 corners)
+│  (or Heatmap Head)│
+└─────────┬─────────┘
+          ↓
+    Corner Coordinates
+          ↓
+┌───────────────────┐
+│  Optional: CV     │  ← Sub-pixel refinement
+│  Refinement       │
+└───────────────────┘
+```
+
+**Why this works for occlusion**: The network learns document shape priors and can predict where corners *should* be even when they're not visible.
+
+---
+
+## Hugging Face Models
+
+### Document-Specific
+
+**[ordaktaktak/Document-Scanner](https://huggingface.co/ordaktaktak/Document-Scanner)**
+- Architecture: U-Net semantic segmentation
+- Input: Grayscale 256×256
+- Output: Binary mask → extract corners via contour detection
+- Framework: PyTorch (.pth weights)
+- Status: Would need conversion to ONNX/TF.js
+
+### Background Removal (Could Adapt)
+
+**[briaai/RMBG-2.0](https://huggingface.co/briaai/RMBG-2.0)**
+- Architecture: BiRefNet (0.2B params) - too large for real-time
+- Input: 1024×1024 RGB
+- Output: Alpha matte
+- Transformers.js compatible
+- License: CC BY-NC 4.0 (non-commercial)
+
+**[briaai/RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4)**
+- Smaller version (44.1M params)
+- Same approach, might be more practical
+
+### General Segmentation (Transformers.js Ready)
+
+| Model | Size | Use Case |
+|-------|------|----------|
+| `Xenova/deeplabv3-mobilevit-xx-small` | Tiny | Fast, low accuracy |
+| `Xenova/deeplabv3-mobilevit-small` | Small | Balanced |
+| `Xenova/deeplabv3-mobilevit-x-small` | X-Small | Middle ground |
+| `nnny/onnx-mobile-sam` | ~5MB | General segmentation with prompts |
+
+### SAM-based Approach
+
+Could use Segment Anything Model with point prompts:
+1. User taps roughly in document area
+2. SAM segments the document
+3. Extract corners from segmentation mask
+
+Models: `nnny/onnx-mobile-sam`, SlimSAM variants
+
+---
+
+## Implementation Options
+
+### Option 1: Convert Document-Scanner to ONNX
+
+```bash
+# Download PyTorch model
+# Convert with torch.onnx.export()
+# Use with ONNX Runtime Web
+
+import torch
+model = load_document_scanner_model()
+dummy_input = torch.randn(1, 1, 256, 256)  # grayscale
+torch.onnx.export(model, dummy_input, "document_scanner.onnx")
+```
+
+Pros: Purpose-built for documents
+Cons: Still outputs mask, need CV for corners
+
+### Option 2: SAM with Point Prompts
+
+```typescript
+import { pipeline } from '@huggingface/transformers';
+
+const segmenter = await pipeline('image-segmentation', 'nnny/onnx-mobile-sam');
+const result = await segmenter(image, { points: [[centerX, centerY]] });
+// Extract corners from mask
+```
+
+Pros: No training needed, handles complex shapes
+Cons: Requires user interaction (point prompt)
+
+### Option 3: Train Custom Corner Detector
+
+Training a lightweight model specifically for corner prediction:
+
+1. **Architecture**: MobileNetV2 → 8-output regression (4 corners × 2 coords)
+2. **Training data**:
+   - SmartDoc dataset (ICDAR)
+   - DocVQA documents on backgrounds
+   - Synthetic: random quads with augmentation
+   - **Critical**: Include finger occlusion augmentation
+3. **Output**: Normalized corner coordinates [0,1]
+4. **Export**: TensorFlow.js or ONNX
+
+This is what Dropbox, Genius Scan, and Scanner Pro actually do.
+
+### Option 4: Hybrid (Recommended for Production)
+
+1. **Primary**: Lightweight CNN corner predictor (handles occlusion)
+2. **Refinement**: Traditional CV on predicted region (sub-pixel accuracy)
+3. **Tracking**: Kalman filter for temporal stability
+
+---
+
+## Datasets
+
+| Dataset | Size | Notes |
+|---------|------|-------|
+| SmartDoc (ICDAR 2015) | 4,260 images | Competition dataset, labeled corners |
+| MIDV-500 | 500 video clips | ID documents, challenging conditions |
+| DocVQA | 50K+ images | Document images (need corner labels) |
+| Synthetic | Unlimited | Generate documents on backgrounds |
+
+---
+
+## Key Takeaways
+
+1. **Traditional CV (our current approach) will always struggle with occlusion** - it can only see visible edges
+
+2. **The industry solution is learned corner prediction** - networks trained on documents learn shape priors
+
+3. **Hybrid approaches work best** - DL for robustness, CV for precision
+
+4. **No ready-to-use model exists** on Hugging Face that:
+   - Is specifically trained for document corners
+   - Handles finger occlusion
+   - Is already in TF.js/ONNX format
+
+5. **Realistic path forward**:
+   - Short term: Try SAM with prompts, or convert Document-Scanner to ONNX
+   - Long term: Train custom MobileNet-based corner detector
+
+---
+
+## References
+
+- [Dropbox: Fast and Accurate Document Detection](https://dropbox.tech/machine-learning/fast-and-accurate-document-detection-for-scanning)
+- [Dropbox: Improving Responsiveness](https://dropbox.tech/machine-learning/improving-the-responsiveness-of-the-document-detector)
+- [Genius Scan: Document Detection with Deep Learning](https://blog.thegrizzlylabs.com/2024/10/document-detection.html)
+- [Scanner Pro: Tech Behind Perfect Scans](https://readdle.com/blog/scanner-pro-border-detection)
+- [LearnOpenCV: Document Segmentation with DeepLabv3](https://learnopencv.com/deep-learning-based-document-segmentation-using-semantic-segmentation-deeplabv3-on-custom-dataset/)
+- [Transformers.js Documentation](https://huggingface.co/docs/transformers.js/en/index)
+- [U-Net Paper (arxiv 1505.04597)](https://arxiv.org/abs/1505.04597)
--- a/apps/web/src/lib/vision/opencv/simpleAsync.ts
+++ b/apps/web/src/lib/vision/opencv/simpleAsync.ts
@ -6,7 +6,7 @@
 */
 export async function simpleDelay(ms: number): Promise<string> {
  console.log('[simpleAsync] simpleDelay called with', ms)
-  await new Promise(resolve => setTimeout(resolve, ms))
+  await new Promise((resolve) => setTimeout(resolve, ms))
  console.log('[simpleAsync] delay complete')
  return 'done'
 }
--- a/apps/web/src/lib/vision/opencv/types.ts
+++ b/apps/web/src/lib/vision/opencv/types.ts
@ -7,7 +7,9 @@

 export interface CVMat {
  delete: () => void
+  copyTo: (dst: CVMat) => void
  data32S: Int32Array
+  data32F: Float32Array
  data: ArrayBuffer
  rows: number
  cols: number
@ -46,7 +48,48 @@ export interface CV {
    borderType: number
  ) => void
  Canny: (src: CVMat, dst: CVMat, t1: number, t2: number) => void
+  Sobel: (src: CVMat, dst: CVMat, ddepth: number, dx: number, dy: number, ksize?: number) => void
+  addWeighted: (
+    src1: CVMat,
+    alpha: number,
+    src2: CVMat,
+    beta: number,
+    gamma: number,
+    dst: CVMat
+  ) => void
+  convertScaleAbs: (src: CVMat, dst: CVMat, alpha?: number, beta?: number) => void
+  equalizeHist: (src: CVMat, dst: CVMat) => void
+  adaptiveThreshold: (
+    src: CVMat,
+    dst: CVMat,
+    maxValue: number,
+    adaptiveMethod: number,
+    thresholdType: number,
+    blockSize: number,
+    C: number
+  ) => void
+  threshold: (src: CVMat, dst: CVMat, thresh: number, maxval: number, type: number) => number
+  bilateralFilter: (
+    src: CVMat,
+    dst: CVMat,
+    d: number,
+    sigmaColor: number,
+    sigmaSpace: number,
+    borderType?: number
+  ) => void
+  morphologyEx: (
+    src: CVMat,
+    dst: CVMat,
+    op: number,
+    kernel: CVMat,
+    anchor?: CVPoint,
+    iterations?: number
+  ) => void
+  getStructuringElement: (shape: number, ksize: CVSize, anchor?: CVPoint) => CVMat
+  erode: (src: CVMat, dst: CVMat, kernel: CVMat, anchor?: CVPoint, iterations?: number) => void
  dilate: (src: CVMat, dst: CVMat, kernel: CVMat, anchor: CVPoint, iterations: number) => void
+  bitwise_or: (src1: CVMat, src2: CVMat, dst: CVMat) => void
+  bitwise_and: (src1: CVMat, src2: CVMat, dst: CVMat) => void
  findContours: (
    src: CVMat,
    contours: CVMatVector,
@ -57,6 +100,17 @@ export interface CV {
  contourArea: (contour: CVMat) => number
  arcLength: (contour: CVMat, closed: boolean) => number
  approxPolyDP: (contour: CVMat, approx: CVMat, epsilon: number, closed: boolean) => void
+  convexHull: (src: CVMat, dst: CVMat, clockwise: boolean, returnPoints: boolean) => void
+  // Hough line detection
+  HoughLinesP: (
+    src: CVMat,
+    lines: CVMat,
+    rho: number,
+    theta: number,
+    threshold: number,
+    minLineLength?: number,
+    maxLineGap?: number
+  ) => void
  getPerspectiveTransform: (src: CVMat, dst: CVMat) => CVMat
  warpPerspective: (
    src: CVMat,
@ -72,13 +126,34 @@ export interface CV {
  COLOR_RGBA2GRAY: number
  BORDER_DEFAULT: number
  RETR_LIST: number
+  RETR_EXTERNAL: number
  CHAIN_APPROX_SIMPLE: number
  CV_32FC2: number
+  CV_32SC4: number
+  CV_8U: number
+  CV_16S: number
+  CV_64F: number
  INTER_LINEAR: number
  BORDER_CONSTANT: number
  ROTATE_90_CLOCKWISE: number
  ROTATE_180: number
  ROTATE_90_COUNTERCLOCKWISE: number
+  // Threshold types
+  THRESH_BINARY: number
+  THRESH_BINARY_INV: number
+  THRESH_OTSU: number
+  // Adaptive threshold methods
+  ADAPTIVE_THRESH_MEAN_C: number
+  ADAPTIVE_THRESH_GAUSSIAN_C: number
+  // Morphological operations
+  MORPH_RECT: number
+  MORPH_ELLIPSE: number
+  MORPH_CROSS: number
+  MORPH_OPEN: number
+  MORPH_CLOSE: number
+  MORPH_GRADIENT: number
+  MORPH_DILATE: number
+  MORPH_ERODE: number
 }

 /**
--- a/apps/web/src/lib/vision/quadDetector.ts
+++ b/apps/web/src/lib/vision/quadDetector.ts
--- a/apps/web/src/lib/vision/useQuadDetection.ts
+++ b/apps/web/src/lib/vision/useQuadDetection.ts
@ -0,0 +1,266 @@
+'use client'
+
+/**
+ * useQuadDetection Hook
+ *
+ * A React hook that combines OpenCV loading, quad detection, and temporal tracking.
+ * Provides a clean API for detecting quadrilaterals in both camera feeds and static images.
+ *
+ * Usage:
+ * ```tsx
+ * import { OpenCvProvider } from 'opencv-react'
+ * import { useQuadDetection } from '@/lib/vision/useQuadDetection'
+ *
+ * // Wrap your app/page with OpenCvProvider
+ * <OpenCvProvider>
+ *   <MyComponent />
+ * </OpenCvProvider>
+ *
+ * // In your component:
+ * function MyComponent() {
+ *   const {
+ *     isReady,
+ *     detectInImage,
+ *     processFrame,
+ *     trackedQuad,
+ *     stats,
+ *     resetTracking,
+ *   } = useQuadDetection()
+ *
+ *   // For static images:
+ *   const quads = detectInImage(canvas)
+ *
+ *   // For camera feeds (call each frame):
+ *   const bestQuad = processFrame(videoFrame)
+ * }
+ * ```
+ */
+
+import { useCallback, useMemo, useRef } from 'react'
+import { useOpenCv } from 'opencv-react'
+import {
+  createQuadDetector,
+  type DetectedQuad,
+  type DebugPolygon,
+  type QuadDetectorConfig,
+} from './quadDetector'
+import { createQuadTracker, type TrackedQuad, type QuadTrackerConfig } from './quadTracker'
+import type { CV } from './opencv/types'
+
+// Re-export types for convenience
+export type { DetectedQuad, Point, DebugPolygon } from './quadDetector'
+export type { TrackedQuad } from './quadTracker'
+export type { QuadDetectorConfig } from './quadDetector'
+export type { QuadTrackerConfig } from './quadTracker'
+
+/** Configuration for useQuadDetection */
+export interface UseQuadDetectionConfig {
+  /** Quad detector configuration */
+  detector?: Partial<QuadDetectorConfig>
+  /** Quad tracker configuration */
+  tracker?: Partial<QuadTrackerConfig>
+}
+
+/** Stats returned by the hook */
+export interface QuadDetectionStats {
+  /** Number of quads currently being tracked */
+  trackedCount: number
+  /** Total frames processed */
+  frameCount: number
+  /** Stability score of the best quad (0-1) */
+  bestStability: number
+  /** Frame count of the best quad */
+  bestFrameCount: number
+}
+
+/** Result from processing a single frame */
+export interface FrameProcessingResult {
+  /** Best tracked quad, or null if none */
+  trackedQuad: TrackedQuad | null
+  /** All quads detected in this frame (before tracking) */
+  detectedQuads: DetectedQuad[]
+  /** Current tracking statistics */
+  stats: QuadDetectionStats
+}
+
+/** Return type of useQuadDetection */
+export interface UseQuadDetectionReturn {
+  /** Whether OpenCV is loaded and detector is ready */
+  isReady: boolean
+  /** Whether OpenCV is currently loading */
+  isLoading: boolean
+  /** Error message if loading failed */
+  error: string | null
+
+  /**
+   * Detect quads in a static image (one-shot, no tracking)
+   * @param source - Canvas to detect in
+   * @returns Array of detected quads, sorted by area (largest first)
+   */
+  detectInImage: (source: HTMLCanvasElement) => DetectedQuad[]
+
+  /**
+   * Detect quads with debug info about all candidate polygons.
+   * Use this to understand why detection is failing.
+   * @param source - Canvas to detect in
+   * @returns Quads and debug info about all candidates
+   */
+  detectWithDebug: (source: HTMLCanvasElement) => {
+    quads: DetectedQuad[]
+    debugPolygons: DebugPolygon[]
+  }
+
+  /**
+   * Process a video frame with tracking
+   * Call this each frame for camera/video feeds
+   * @param source - Canvas from video frame
+   * @param frameSize - Optional explicit frame size (inferred from source if not provided)
+   * @returns Frame processing result with tracked quad, detected quads, and stats
+   */
+  processFrame: (
+    source: HTMLCanvasElement,
+    frameSize?: { width: number; height: number }
+  ) => FrameProcessingResult
+
+  /** The current best tracked quad */
+  trackedQuad: TrackedQuad | null
+
+  /** All currently tracked quads */
+  allTrackedQuads: TrackedQuad[]
+
+  /** Current tracking statistics */
+  stats: QuadDetectionStats
+
+  /** Reset all tracking state (call when switching cameras, etc.) */
+  resetTracking: () => void
+}
+
+/**
+ * React hook for quad detection with optional temporal tracking.
+ *
+ * Must be used inside an OpenCvProvider from 'opencv-react'.
+ *
+ * @param config - Optional configuration for detector and tracker
+ */
+export function useQuadDetection(config?: UseQuadDetectionConfig): UseQuadDetectionReturn {
+  const { loaded: opencvLoaded, cv } = useOpenCv()
+
+  // Track the current best quad in a ref for synchronous access
+  const trackedQuadRef = useRef<TrackedQuad | null>(null)
+  const allTrackedRef = useRef<TrackedQuad[]>([])
+  const statsRef = useRef<QuadDetectionStats>({
+    trackedCount: 0,
+    frameCount: 0,
+    bestStability: 0,
+    bestFrameCount: 0,
+  })
+
+  // Create detector when cv is available
+  const detector = useMemo(() => {
+    if (!opencvLoaded || !cv) return null
+    try {
+      return createQuadDetector(cv as CV, config?.detector)
+    } catch (err) {
+      console.error('[useQuadDetection] Failed to create detector:', err)
+      return null
+    }
+  }, [opencvLoaded, cv, config?.detector])
+
+  // Create tracker (doesn't need cv)
+  const tracker = useMemo(() => createQuadTracker(config?.tracker), [config?.tracker])
+
+  // Detect in static image (no tracking)
+  const detectInImage = useCallback(
+    (source: HTMLCanvasElement): DetectedQuad[] => {
+      if (!detector) {
+        console.warn('[useQuadDetection] detectInImage called before detector ready')
+        return []
+      }
+      return detector.detect(source)
+    },
+    [detector]
+  )
+
+  // Detect with debug info (for debugging detection issues)
+  const detectWithDebug = useCallback(
+    (source: HTMLCanvasElement): { quads: DetectedQuad[]; debugPolygons: DebugPolygon[] } => {
+      if (!detector) {
+        console.warn('[useQuadDetection] detectWithDebug called before detector ready')
+        return { quads: [], debugPolygons: [] }
+      }
+      return detector.detectWithDebug(source)
+    },
+    [detector]
+  )
+
+  // Process video frame with tracking
+  const processFrame = useCallback(
+    (
+      source: HTMLCanvasElement,
+      frameSize?: { width: number; height: number }
+    ): FrameProcessingResult => {
+      if (!detector) {
+        return {
+          trackedQuad: null,
+          detectedQuads: [],
+          stats: statsRef.current,
+        }
+      }
+
+      // Detect quads in frame
+      const quads = detector.detect(source)
+
+      // Determine frame size
+      const size = frameSize ?? {
+        width: source.width,
+        height: source.height,
+      }
+
+      // Update tracker
+      const bestQuad = tracker.update(quads, size)
+      const currentStats = tracker.getStats()
+      const allTracked = tracker.getAllTracked()
+
+      // Update refs
+      trackedQuadRef.current = bestQuad
+      allTrackedRef.current = allTracked
+      statsRef.current = currentStats
+
+      return {
+        trackedQuad: bestQuad,
+        detectedQuads: quads,
+        stats: currentStats,
+      }
+    },
+    [detector, tracker]
+  )
+
+  // Reset tracking
+  const resetTracking = useCallback(() => {
+    tracker.reset()
+    trackedQuadRef.current = null
+    allTrackedRef.current = []
+    statsRef.current = {
+      trackedCount: 0,
+      frameCount: 0,
+      bestStability: 0,
+      bestFrameCount: 0,
+    }
+  }, [tracker])
+
+  return {
+    isReady: !!detector,
+    isLoading: !opencvLoaded,
+    error: null, // opencv-react doesn't expose errors directly
+
+    detectInImage,
+    detectWithDebug,
+    processFrame,
+
+    trackedQuad: trackedQuadRef.current,
+    allTrackedQuads: allTrackedRef.current,
+    stats: statsRef.current,
+
+    resetTracking,
+  }
+}