feat(vision): enhance quad detection with Hough lines and multi-strategy preprocessing
- Add Hough line detection for improved edge finding with finger occlusion - Implement multi-strategy preprocessing (standard, enhanced, adaptive, multi) - Add configurable parameters for Canny thresholds, adaptive threshold, morph gradient - Refactor useDocumentDetection hook with cleaner API - Add OpenCV type definitions and async loading improvements - Add loader test pages for debugging OpenCV initialization - Add quad-test page for interactive detection testing - Add document detection research notes Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
bc02ba281d
commit
93a25c1e7b
|
|
@ -641,7 +641,16 @@
|
|||
"Bash(if [ -f /Users/antialias/projects/soroban-abacus-flashcards/apps/web/data/vision-training/collected/.deleted ])",
|
||||
"Bash(then wc -l /Users/antialias/projects/soroban-abacus-flashcards/apps/web/data/vision-training/collected/.deleted)",
|
||||
"Bash(else echo \"File does not exist\")",
|
||||
"Bash(fi)"
|
||||
"Bash(fi)",
|
||||
"WebFetch(domain:docs.opencv.org)",
|
||||
"mcp__chrome-devtools__new_page",
|
||||
"mcp__chrome-devtools__close_page",
|
||||
"WebFetch(domain:www.npmjs.com)",
|
||||
"Bash(git branch:*)",
|
||||
"WebFetch(domain:scanbot.io)",
|
||||
"WebFetch(domain:learnopencv.com)",
|
||||
"WebFetch(domain:news.ycombinator.com)",
|
||||
"Bash(npm run typecheck:*)"
|
||||
],
|
||||
"deny": [],
|
||||
"ask": []
|
||||
|
|
|
|||
|
|
@ -339,8 +339,8 @@ export function NavSyncIndicator({ sync }: NavSyncIndicatorProps) {
|
|||
</div>
|
||||
{sync.status?.local && sync.status?.remote && (
|
||||
<div className={css({ color: 'gray.400', fontSize: 'xs' })}>
|
||||
Local: {sync.status.local.totalImages?.toLocaleString() || 0} •{' '}
|
||||
Remote: {sync.status.remote.totalImages?.toLocaleString() || 0}
|
||||
Local: {sync.status.local.totalImages?.toLocaleString() || 0} • Remote:{' '}
|
||||
{sync.status.remote.totalImages?.toLocaleString() || 0}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
|
|
|||
|
|
@ -118,7 +118,9 @@ export function useSyncStatus(modelType: ModelType): UseSyncStatusResult {
|
|||
const refreshHistory = useCallback(async () => {
|
||||
setHistoryLoading(true)
|
||||
try {
|
||||
const response = await fetch(`/api/vision-training/sync/history?modelType=${modelType}&limit=5`)
|
||||
const response = await fetch(
|
||||
`/api/vision-training/sync/history?modelType=${modelType}&limit=5`
|
||||
)
|
||||
if (response.ok) {
|
||||
const data = await response.json()
|
||||
setHistory(data.history || [])
|
||||
|
|
|
|||
|
|
@ -77,7 +77,8 @@ export default function LoaderTestAsyncPage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -80,7 +80,8 @@ export default function LoaderTestBarePage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -32,9 +32,7 @@ export default function LoaderTestCheckPage() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Check window.cv (No Loading)
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Check window.cv (No Loading)</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Just checks if window.cv exists - no loading.
|
||||
</p>
|
||||
|
|
|
|||
|
|
@ -47,9 +47,7 @@ export default function LoaderTestDirectPage() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Direct Import Test
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Direct Import Test</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Imports directly from loader.ts (not barrel index.ts).
|
||||
</p>
|
||||
|
|
@ -80,7 +78,8 @@ export default function LoaderTestDirectPage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -49,9 +49,7 @@ export default function LoaderTestHookCustomPage() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Custom Hook Test
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Custom Hook Test</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Uses custom useOpenCV hook from separate file.
|
||||
</p>
|
||||
|
|
@ -82,7 +80,8 @@ export default function LoaderTestHookCustomPage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -82,7 +82,8 @@ export default function LoaderTestHookPage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -132,9 +132,7 @@ export default function LoaderTestInlinePage() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Inline Loader Test
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Inline Loader Test</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Loader code is INLINE in this component (not imported from module).
|
||||
</p>
|
||||
|
|
@ -165,7 +163,8 @@ export default function LoaderTestInlinePage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -41,9 +41,7 @@ export default function LoaderTestScriptPage() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Script Tag Test (No Waiting)
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Script Tag Test (No Waiting)</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Step 1: Add script tag. Step 2: Check if cv loaded.
|
||||
</p>
|
||||
|
|
|
|||
|
|
@ -79,7 +79,8 @@ export default function LoaderTestSimplePage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -43,9 +43,7 @@ export default function LoaderTestV2Page() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Loader V2 Test
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Loader V2 Test</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Uses new loaderV2.ts with proven working pattern.
|
||||
</p>
|
||||
|
|
@ -76,7 +74,8 @@ export default function LoaderTestV2Page() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,8 @@ export default function LoaderTestV3Page() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,8 @@ export default function LoaderTestV4Page() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -44,9 +44,7 @@ export default function LoaderTestV5Page() {
|
|||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Loader V5 Test (No Internal Await)
|
||||
</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Returns Promise, consumer awaits it.
|
||||
</p>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>Returns Promise, consumer awaits it.</p>
|
||||
|
||||
<button
|
||||
type="button"
|
||||
|
|
@ -74,7 +72,8 @@ export default function LoaderTestV5Page() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -46,9 +46,7 @@ export default function LoaderTestWaitPage() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Wait Test (Imported Promise)
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Wait Test (Imported Promise)</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Adds script tag then waits with imported Promise function.
|
||||
</p>
|
||||
|
|
@ -79,7 +77,8 @@ export default function LoaderTestWaitPage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -51,9 +51,7 @@ export default function LoaderTestWrappedPage() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Wrapped Import Test
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Wrapped Import Test</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
Imported loadOpenCV wrapped in useCallback.
|
||||
</p>
|
||||
|
|
@ -84,7 +82,8 @@ export default function LoaderTestWrappedPage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
|
|
@ -47,9 +47,7 @@ export default function LoaderTestPage() {
|
|||
gap: 4,
|
||||
})}
|
||||
>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>
|
||||
Minimal OpenCV Loader Test
|
||||
</h1>
|
||||
<h1 className={css({ fontSize: '2xl', fontWeight: 'bold' })}>Minimal OpenCV Loader Test</h1>
|
||||
<p className={css({ color: 'gray.400', mb: 4 })}>
|
||||
This page ONLY imports the standalone loader. No useDocumentDetection.
|
||||
</p>
|
||||
|
|
@ -80,7 +78,8 @@ export default function LoaderTestPage() {
|
|||
Status:{' '}
|
||||
<span
|
||||
className={css({
|
||||
color: status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
color:
|
||||
status === 'success' ? 'green.400' : status === 'error' ? 'red.400' : 'gray.400',
|
||||
})}
|
||||
>
|
||||
{status}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -534,7 +534,9 @@ export function UnifiedDataPanel({ modelType, onDataChanged }: UnifiedDataPanelP
|
|||
const handleStartSync = useCallback(async () => {
|
||||
setSyncProgress({ phase: 'connecting', message: 'Connecting...' })
|
||||
try {
|
||||
const response = await fetch(`/api/vision-training/sync?modelType=${modelType}`, { method: 'POST' })
|
||||
const response = await fetch(`/api/vision-training/sync?modelType=${modelType}`, {
|
||||
method: 'POST',
|
||||
})
|
||||
if (response.ok) {
|
||||
setSyncProgress({ phase: 'complete', message: 'Sync complete!' })
|
||||
loadItems()
|
||||
|
|
|
|||
|
|
@ -1,129 +1,33 @@
|
|||
'use client'
|
||||
|
||||
import { useCallback, useRef, useState } from 'react'
|
||||
import { useCallback, useEffect, useRef, useState } from 'react'
|
||||
import {
|
||||
createQuadDetector,
|
||||
type DetectedQuad as ModularDetectedQuad,
|
||||
type QuadDetectorConfig,
|
||||
} from '@/lib/vision/quadDetector'
|
||||
import { createQuadTracker, type TrackedQuad as ModularTrackedQuad } from '@/lib/vision/quadTracker'
|
||||
import type { CV, CVMat } from '@/lib/vision/opencv/types'
|
||||
|
||||
// Re-export config type for consumers
|
||||
export type { QuadDetectorConfig } from '@/lib/vision/quadDetector'
|
||||
|
||||
/**
|
||||
* Hook for document detection using OpenCV.js directly
|
||||
*
|
||||
* Features:
|
||||
* - Lazy loads OpenCV.js (~8MB) only when first used
|
||||
* - Multi-quad tracking: detects ALL quadrilaterals, not just the largest
|
||||
* - Uses modular quadDetector and quadTracker from @/lib/vision
|
||||
* - Scores quads by: size, aspect ratio, and temporal stability
|
||||
* - Filters out small quads (likely printed on page) vs page-sized quads
|
||||
* - Provides highlightDocument for drawing detected quad on overlay
|
||||
* - Provides extractDocument for cropping/deskewing captured image
|
||||
*/
|
||||
|
||||
// OpenCV.js types (minimal interface for what we use)
|
||||
interface CVMat {
|
||||
delete: () => void
|
||||
data32S: Int32Array
|
||||
rows: number
|
||||
cols: number
|
||||
}
|
||||
|
||||
interface CVMatVector {
|
||||
size: () => number
|
||||
get: (i: number) => CVMat
|
||||
delete: () => void
|
||||
}
|
||||
|
||||
interface CVSize {
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
interface CVPoint {
|
||||
x: number
|
||||
y: number
|
||||
}
|
||||
|
||||
interface CV {
|
||||
Mat: new () => CVMat
|
||||
MatVector: new () => CVMatVector
|
||||
Size: new (w: number, h: number) => CVSize
|
||||
Scalar: new (r?: number, g?: number, b?: number, a?: number) => unknown
|
||||
imread: (canvas: HTMLCanvasElement) => CVMat
|
||||
imshow: (canvas: HTMLCanvasElement, mat: CVMat) => void
|
||||
cvtColor: (src: CVMat, dst: CVMat, code: number) => void
|
||||
GaussianBlur: (
|
||||
src: CVMat,
|
||||
dst: CVMat,
|
||||
size: CVSize,
|
||||
sigmaX: number,
|
||||
sigmaY: number,
|
||||
borderType: number
|
||||
) => void
|
||||
Canny: (src: CVMat, dst: CVMat, t1: number, t2: number) => void
|
||||
dilate: (src: CVMat, dst: CVMat, kernel: CVMat, anchor: CVPoint, iterations: number) => void
|
||||
findContours: (
|
||||
src: CVMat,
|
||||
contours: CVMatVector,
|
||||
hierarchy: CVMat,
|
||||
mode: number,
|
||||
method: number
|
||||
) => void
|
||||
contourArea: (contour: CVMat) => number
|
||||
arcLength: (contour: CVMat, closed: boolean) => number
|
||||
approxPolyDP: (contour: CVMat, approx: CVMat, epsilon: number, closed: boolean) => void
|
||||
getPerspectiveTransform: (src: CVMat, dst: CVMat) => CVMat
|
||||
warpPerspective: (
|
||||
src: CVMat,
|
||||
dst: CVMat,
|
||||
M: CVMat,
|
||||
size: CVSize,
|
||||
flags: number,
|
||||
borderMode: number,
|
||||
borderValue: unknown
|
||||
) => void
|
||||
warpAffine: (
|
||||
src: CVMat,
|
||||
dst: CVMat,
|
||||
M: CVMat,
|
||||
size: CVSize,
|
||||
flags?: number,
|
||||
borderMode?: number,
|
||||
borderValue?: unknown
|
||||
) => void
|
||||
getRotationMatrix2D: (center: CVPoint, angle: number, scale: number) => CVMat
|
||||
rotate: (src: CVMat, dst: CVMat, rotateCode: number) => void
|
||||
countNonZero: (src: CVMat) => number
|
||||
matFromArray: (rows: number, cols: number, type: number, data: number[]) => CVMat
|
||||
COLOR_RGBA2GRAY: number
|
||||
BORDER_DEFAULT: number
|
||||
RETR_LIST: number
|
||||
CHAIN_APPROX_SIMPLE: number
|
||||
CV_32FC2: number
|
||||
INTER_LINEAR: number
|
||||
BORDER_CONSTANT: number
|
||||
ROTATE_90_CLOCKWISE: number
|
||||
ROTATE_180: number
|
||||
ROTATE_90_COUNTERCLOCKWISE: number
|
||||
}
|
||||
|
||||
/** Represents a detected quadrilateral with corner points */
|
||||
interface DetectedQuad {
|
||||
corners: Array<{ x: number; y: number }>
|
||||
area: number
|
||||
aspectRatio: number
|
||||
// Unique ID based on approximate center position
|
||||
centerId: string
|
||||
}
|
||||
|
||||
/** Tracked quad candidate with history */
|
||||
interface TrackedQuad {
|
||||
id: string
|
||||
corners: Array<{ x: number; y: number }>
|
||||
area: number
|
||||
aspectRatio: number
|
||||
/** How many frames this quad has been seen */
|
||||
frameCount: number
|
||||
/** Last frame number when this quad was seen */
|
||||
lastSeenFrame: number
|
||||
/** Stability score based on corner consistency */
|
||||
stabilityScore: number
|
||||
/** History of corner positions for stability calculation */
|
||||
cornerHistory: Array<Array<{ x: number; y: number }>>
|
||||
/** Internal tracked quad type for backward compatibility */
|
||||
interface TrackedQuad extends ModularTrackedQuad {
|
||||
/** History of corner positions for stability calculation (used by extractDocument) */
|
||||
cornerHistory?: Array<Array<{ x: number; y: number }>>
|
||||
}
|
||||
|
||||
export interface DocumentDetectionDebugInfo {
|
||||
|
|
@ -143,28 +47,12 @@ export interface DocumentDetectionDebugInfo {
|
|||
lastDetectionError: string | null
|
||||
}
|
||||
|
||||
/** Number of frames to track quad history */
|
||||
const HISTORY_LENGTH = 10
|
||||
/** Minimum frames a quad must be seen to be considered stable */
|
||||
const MIN_FRAMES_FOR_STABLE = 3
|
||||
/** Minimum frames for "locked" state */
|
||||
const LOCKED_FRAME_COUNT = 5
|
||||
/** Maximum distance (as % of frame diagonal) for quads to be considered "same" */
|
||||
const QUAD_MATCH_THRESHOLD = 0.08
|
||||
/** Minimum area as % of frame for a quad to be considered page-sized */
|
||||
const MIN_AREA_RATIO = 0.15
|
||||
/** Maximum area as % of frame (filter out frame edges detected as quad) */
|
||||
const MAX_AREA_RATIO = 0.95
|
||||
/** Expected aspect ratios for documents (width/height) */
|
||||
const EXPECTED_ASPECT_RATIOS = [
|
||||
8.5 / 11, // US Letter portrait
|
||||
11 / 8.5, // US Letter landscape
|
||||
1 / Math.sqrt(2), // A4 portrait
|
||||
Math.sqrt(2), // A4 landscape
|
||||
1, // Square
|
||||
]
|
||||
/** How close aspect ratio must be to expected (tolerance) */
|
||||
const ASPECT_RATIO_TOLERANCE = 0.3
|
||||
/** Minimum stability score for locked state */
|
||||
const MIN_STABILITY_FOR_LOCKED = 0.5
|
||||
|
||||
export interface DetectQuadsInImageResult {
|
||||
/** Whether a document quad was detected */
|
||||
|
|
@ -226,18 +114,44 @@ export interface UseDocumentDetectionReturn {
|
|||
* Returns the canvas, or null if loading failed
|
||||
*/
|
||||
loadImageToCanvas: (file: File) => Promise<HTMLCanvasElement | null>
|
||||
/**
|
||||
* Reset all tracking state (call when returning from adjustment mode)
|
||||
*/
|
||||
resetTracking: () => void
|
||||
/**
|
||||
* Update detector configuration (recreates detector with new settings)
|
||||
*/
|
||||
updateDetectorConfig: (config: Partial<QuadDetectorConfig>) => void
|
||||
/**
|
||||
* Current detector configuration
|
||||
*/
|
||||
detectorConfig: Partial<QuadDetectorConfig>
|
||||
}
|
||||
|
||||
export function useDocumentDetection(): UseDocumentDetectionReturn {
|
||||
export interface UseDocumentDetectionOptions {
|
||||
/** Initial detector configuration */
|
||||
detectorConfig?: Partial<QuadDetectorConfig>
|
||||
}
|
||||
|
||||
export function useDocumentDetection(
|
||||
options?: UseDocumentDetectionOptions
|
||||
): UseDocumentDetectionReturn {
|
||||
// Start with isLoading=false since we won't load until requested
|
||||
const [isLoading, setIsLoading] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const cvRef = useRef<CV | null>(null)
|
||||
const loadPromiseRef = useRef<Promise<void> | null>(null)
|
||||
|
||||
// Multi-quad tracking
|
||||
const trackedQuadsRef = useRef<Map<string, TrackedQuad>>(new Map())
|
||||
const frameCountRef = useRef(0)
|
||||
// Detector configuration (can be updated dynamically)
|
||||
const [detectorConfig, setDetectorConfig] = useState<Partial<QuadDetectorConfig>>(
|
||||
options?.detectorConfig ?? {}
|
||||
)
|
||||
|
||||
// Modular detector and tracker (created after OpenCV loads)
|
||||
const detectorRef = useRef<ReturnType<typeof createQuadDetector> | null>(null)
|
||||
const trackerRef = useRef<ReturnType<typeof createQuadTracker> | null>(null)
|
||||
|
||||
// Best quad tracking
|
||||
const bestQuadRef = useRef<TrackedQuad | null>(null)
|
||||
const lastStableFrameRef = useRef<HTMLCanvasElement | null>(null)
|
||||
|
||||
|
|
@ -348,6 +262,15 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
|
||||
// Store OpenCV reference
|
||||
cvRef.current = (window as unknown as { cv: CV }).cv
|
||||
|
||||
// Create modular detector and tracker with current config
|
||||
detectorRef.current = createQuadDetector(cvRef.current, detectorConfig)
|
||||
trackerRef.current = createQuadTracker({
|
||||
minFramesForStable: MIN_FRAMES_FOR_STABLE,
|
||||
minFramesForLocked: LOCKED_FRAME_COUNT,
|
||||
minStabilityForLocked: MIN_STABILITY_FOR_LOCKED,
|
||||
})
|
||||
|
||||
const loadTime = Date.now() - loadStartTimeRef.current
|
||||
setDebugInfo((prev) => ({ ...prev, loadTimeMs: loadTime }))
|
||||
setIsLoading(false)
|
||||
|
|
@ -365,7 +288,19 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
} catch {
|
||||
return false
|
||||
}
|
||||
}, [isOpenCVReady])
|
||||
}, [isOpenCVReady, detectorConfig])
|
||||
|
||||
// Recreate detector when config changes (if OpenCV is already loaded)
|
||||
useEffect(() => {
|
||||
if (cvRef.current && detectorRef.current) {
|
||||
detectorRef.current = createQuadDetector(cvRef.current, detectorConfig)
|
||||
}
|
||||
}, [detectorConfig])
|
||||
|
||||
// Update detector config function
|
||||
const updateDetectorConfig = useCallback((newConfig: Partial<QuadDetectorConfig>) => {
|
||||
setDetectorConfig((prev) => ({ ...prev, ...newConfig }))
|
||||
}, [])
|
||||
|
||||
// Reusable canvas for video frame capture
|
||||
const frameCanvasRef = useRef<HTMLCanvasElement | null>(null)
|
||||
|
|
@ -391,7 +326,7 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
return frameCanvas
|
||||
}, [])
|
||||
|
||||
// Calculate distance between two points
|
||||
// Calculate distance between two points (kept for extractDocument)
|
||||
const distance = useCallback(
|
||||
(p1: { x: number; y: number }, p2: { x: number; y: number }): number => {
|
||||
return Math.sqrt((p1.x - p2.x) ** 2 + (p1.y - p2.y) ** 2)
|
||||
|
|
@ -399,298 +334,6 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
[]
|
||||
)
|
||||
|
||||
// Order corners: top-left, top-right, bottom-right, bottom-left
|
||||
const orderCorners = useCallback(
|
||||
(corners: Array<{ x: number; y: number }>): Array<{ x: number; y: number }> => {
|
||||
if (corners.length !== 4) return corners
|
||||
|
||||
// Find centroid
|
||||
const cx = corners.reduce((s, c) => s + c.x, 0) / 4
|
||||
const cy = corners.reduce((s, c) => s + c.y, 0) / 4
|
||||
|
||||
// Sort by angle from centroid
|
||||
const sorted = [...corners].sort((a, b) => {
|
||||
const angleA = Math.atan2(a.y - cy, a.x - cx)
|
||||
const angleB = Math.atan2(b.y - cy, b.x - cx)
|
||||
return angleA - angleB
|
||||
})
|
||||
|
||||
// Find top-left (smallest x+y)
|
||||
let topLeftIdx = 0
|
||||
let minSum = Infinity
|
||||
for (let i = 0; i < 4; i++) {
|
||||
const sum = sorted[i].x + sorted[i].y
|
||||
if (sum < minSum) {
|
||||
minSum = sum
|
||||
topLeftIdx = i
|
||||
}
|
||||
}
|
||||
|
||||
// Rotate array so top-left is first
|
||||
const ordered = []
|
||||
for (let i = 0; i < 4; i++) {
|
||||
ordered.push(sorted[(topLeftIdx + i) % 4])
|
||||
}
|
||||
|
||||
return ordered
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
// Check if aspect ratio is document-like
|
||||
const isDocumentAspectRatio = useCallback((ratio: number): boolean => {
|
||||
return EXPECTED_ASPECT_RATIOS.some(
|
||||
(expected) => Math.abs(ratio - expected) < ASPECT_RATIO_TOLERANCE
|
||||
)
|
||||
}, [])
|
||||
|
||||
// Generate a stable ID for a quad based on its center position
|
||||
const getQuadCenterId = useCallback(
|
||||
(corners: Array<{ x: number; y: number }>, frameWidth: number, frameHeight: number): string => {
|
||||
const cx = corners.reduce((s, c) => s + c.x, 0) / 4
|
||||
const cy = corners.reduce((s, c) => s + c.y, 0) / 4
|
||||
// Quantize to grid cells (10x10 grid)
|
||||
const gridX = Math.floor((cx / frameWidth) * 10)
|
||||
const gridY = Math.floor((cy / frameHeight) * 10)
|
||||
return `${gridX},${gridY}`
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
// Check if two quads are similar (same document)
|
||||
const quadsMatch = useCallback(
|
||||
(
|
||||
q1: Array<{ x: number; y: number }>,
|
||||
q2: Array<{ x: number; y: number }>,
|
||||
frameDiagonal: number
|
||||
): boolean => {
|
||||
const threshold = frameDiagonal * QUAD_MATCH_THRESHOLD
|
||||
let totalDist = 0
|
||||
for (let i = 0; i < 4; i++) {
|
||||
totalDist += distance(q1[i], q2[i])
|
||||
}
|
||||
return totalDist / 4 < threshold
|
||||
},
|
||||
[distance]
|
||||
)
|
||||
|
||||
// Calculate corner stability (how much corners move between frames)
|
||||
const calculateCornerStability = useCallback(
|
||||
(history: Array<Array<{ x: number; y: number }>>): number => {
|
||||
if (history.length < 2) return 0
|
||||
|
||||
let totalVariance = 0
|
||||
for (let corner = 0; corner < 4; corner++) {
|
||||
const xs = history.map((h) => h[corner].x)
|
||||
const ys = history.map((h) => h[corner].y)
|
||||
const meanX = xs.reduce((a, b) => a + b, 0) / xs.length
|
||||
const meanY = ys.reduce((a, b) => a + b, 0) / ys.length
|
||||
const varX = xs.reduce((a, b) => a + (b - meanX) ** 2, 0) / xs.length
|
||||
const varY = ys.reduce((a, b) => a + (b - meanY) ** 2, 0) / ys.length
|
||||
totalVariance += Math.sqrt(varX + varY)
|
||||
}
|
||||
|
||||
// Convert variance to stability score (lower variance = higher stability)
|
||||
// Normalize: variance of 0 = stability 1, variance of 50+ = stability 0
|
||||
const avgVariance = totalVariance / 4
|
||||
return Math.max(0, 1 - avgVariance / 50)
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
// Find all quadrilaterals in the frame using OpenCV
|
||||
const findAllQuads = useCallback(
|
||||
(frameCanvas: HTMLCanvasElement): DetectedQuad[] => {
|
||||
const cv = cvRef.current
|
||||
if (!cv) return []
|
||||
|
||||
const quads: DetectedQuad[] = []
|
||||
const frameArea = frameCanvas.width * frameCanvas.height
|
||||
const frameDiagonal = Math.sqrt(frameCanvas.width ** 2 + frameCanvas.height ** 2)
|
||||
|
||||
// OpenCV processing
|
||||
let src: CVMat | null = null
|
||||
let gray: CVMat | null = null
|
||||
let blurred: CVMat | null = null
|
||||
let edges: CVMat | null = null
|
||||
let contours: CVMatVector | null = null
|
||||
let hierarchy: CVMat | null = null
|
||||
|
||||
try {
|
||||
src = cv.imread(frameCanvas)
|
||||
gray = new cv.Mat()
|
||||
blurred = new cv.Mat()
|
||||
edges = new cv.Mat()
|
||||
|
||||
// Convert to grayscale
|
||||
cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY)
|
||||
|
||||
// Blur to reduce noise
|
||||
cv.GaussianBlur(gray, blurred, new cv.Size(5, 5), 0, 0, cv.BORDER_DEFAULT)
|
||||
|
||||
// Edge detection
|
||||
cv.Canny(blurred, edges, 50, 150)
|
||||
|
||||
// Dilate edges to connect gaps
|
||||
const kernel = new cv.Mat()
|
||||
cv.dilate(edges, edges, kernel, { x: -1, y: -1 } as CVPoint, 1)
|
||||
kernel.delete()
|
||||
|
||||
// Find contours
|
||||
contours = new cv.MatVector()
|
||||
hierarchy = new cv.Mat()
|
||||
cv.findContours(edges, contours, hierarchy, cv.RETR_LIST, cv.CHAIN_APPROX_SIMPLE)
|
||||
|
||||
// Process each contour
|
||||
for (let i = 0; i < contours.size(); i++) {
|
||||
const contour = contours.get(i)
|
||||
const area = cv.contourArea(contour)
|
||||
const areaRatio = area / frameArea
|
||||
|
||||
// Skip if too small or too large
|
||||
if (areaRatio < MIN_AREA_RATIO || areaRatio > MAX_AREA_RATIO) {
|
||||
continue
|
||||
}
|
||||
|
||||
// Approximate to polygon
|
||||
const approx = new cv.Mat()
|
||||
const perimeter = cv.arcLength(contour, true)
|
||||
cv.approxPolyDP(contour, approx, 0.02 * perimeter, true)
|
||||
|
||||
// Check if it's a quadrilateral
|
||||
if (approx.rows === 4) {
|
||||
// Extract corners
|
||||
const corners: Array<{ x: number; y: number }> = []
|
||||
for (let j = 0; j < 4; j++) {
|
||||
corners.push({
|
||||
x: approx.data32S[j * 2],
|
||||
y: approx.data32S[j * 2 + 1],
|
||||
})
|
||||
}
|
||||
|
||||
// Order corners consistently
|
||||
const orderedCorners = orderCorners(corners)
|
||||
|
||||
// Calculate aspect ratio
|
||||
const width = distance(orderedCorners[0], orderedCorners[1])
|
||||
const height = distance(orderedCorners[1], orderedCorners[2])
|
||||
const aspectRatio = Math.max(width, height) / Math.min(width, height)
|
||||
|
||||
// Check if aspect ratio is document-like
|
||||
if (isDocumentAspectRatio(aspectRatio)) {
|
||||
quads.push({
|
||||
corners: orderedCorners,
|
||||
area,
|
||||
aspectRatio,
|
||||
centerId: getQuadCenterId(orderedCorners, frameCanvas.width, frameCanvas.height),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
approx.delete()
|
||||
}
|
||||
} finally {
|
||||
// Clean up OpenCV memory
|
||||
src?.delete()
|
||||
gray?.delete()
|
||||
blurred?.delete()
|
||||
edges?.delete()
|
||||
contours?.delete()
|
||||
hierarchy?.delete()
|
||||
}
|
||||
|
||||
// Sort by area (largest first)
|
||||
quads.sort((a, b) => b.area - a.area)
|
||||
|
||||
return quads
|
||||
},
|
||||
[distance, orderCorners, isDocumentAspectRatio, getQuadCenterId]
|
||||
)
|
||||
|
||||
// Update tracked quads with new detections
|
||||
const updateTrackedQuads = useCallback(
|
||||
(
|
||||
detectedQuads: DetectedQuad[],
|
||||
frameWidth: number,
|
||||
frameHeight: number
|
||||
): TrackedQuad | null => {
|
||||
const currentFrame = frameCountRef.current++
|
||||
const trackedQuads = trackedQuadsRef.current
|
||||
const frameDiagonal = Math.sqrt(frameWidth ** 2 + frameHeight ** 2)
|
||||
|
||||
// Mark all tracked quads as not seen this frame
|
||||
const seenIds = new Set<string>()
|
||||
|
||||
// Match detected quads to tracked quads
|
||||
for (const detected of detectedQuads) {
|
||||
let matched = false
|
||||
|
||||
for (const [id, tracked] of trackedQuads) {
|
||||
if (!seenIds.has(id) && quadsMatch(detected.corners, tracked.corners, frameDiagonal)) {
|
||||
// Update existing tracked quad
|
||||
tracked.corners = detected.corners
|
||||
tracked.area = detected.area
|
||||
tracked.aspectRatio = detected.aspectRatio
|
||||
tracked.frameCount++
|
||||
tracked.lastSeenFrame = currentFrame
|
||||
tracked.cornerHistory.push(detected.corners)
|
||||
if (tracked.cornerHistory.length > HISTORY_LENGTH) {
|
||||
tracked.cornerHistory.shift()
|
||||
}
|
||||
tracked.stabilityScore = calculateCornerStability(tracked.cornerHistory)
|
||||
seenIds.add(id)
|
||||
matched = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (!matched) {
|
||||
// New quad - start tracking
|
||||
const newId = `quad_${currentFrame}_${Math.random().toString(36).slice(2, 8)}`
|
||||
trackedQuads.set(newId, {
|
||||
id: newId,
|
||||
corners: detected.corners,
|
||||
area: detected.area,
|
||||
aspectRatio: detected.aspectRatio,
|
||||
frameCount: 1,
|
||||
lastSeenFrame: currentFrame,
|
||||
stabilityScore: 0,
|
||||
cornerHistory: [detected.corners],
|
||||
})
|
||||
seenIds.add(newId)
|
||||
}
|
||||
}
|
||||
|
||||
// Remove quads not seen for a while
|
||||
for (const [id, tracked] of trackedQuads) {
|
||||
if (currentFrame - tracked.lastSeenFrame > 3) {
|
||||
trackedQuads.delete(id)
|
||||
}
|
||||
}
|
||||
|
||||
// Find best quad (highest score = frameCount * stability * area)
|
||||
let bestQuad: TrackedQuad | null = null
|
||||
let bestScore = 0
|
||||
|
||||
for (const tracked of trackedQuads.values()) {
|
||||
// Only consider quads seen recently
|
||||
if (currentFrame - tracked.lastSeenFrame > 2) continue
|
||||
|
||||
// Score: prioritize stability and longevity, then area
|
||||
const score = tracked.frameCount * (0.5 + tracked.stabilityScore) * Math.sqrt(tracked.area)
|
||||
|
||||
if (score > bestScore) {
|
||||
bestScore = score
|
||||
bestQuad = tracked
|
||||
}
|
||||
}
|
||||
|
||||
bestQuadRef.current = bestQuad
|
||||
return bestQuad
|
||||
},
|
||||
[quadsMatch, calculateCornerStability]
|
||||
)
|
||||
|
||||
// Draw quad on overlay canvas
|
||||
const drawQuad = useCallback(
|
||||
(
|
||||
|
|
@ -725,8 +368,9 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
|
||||
const highlightDocument = useCallback(
|
||||
(video: HTMLVideoElement, overlayCanvas: HTMLCanvasElement): boolean => {
|
||||
const cv = cvRef.current
|
||||
if (!cv) return false
|
||||
const detector = detectorRef.current
|
||||
const tracker = trackerRef.current
|
||||
if (!detector || !tracker) return false
|
||||
|
||||
const startTime = performance.now()
|
||||
|
||||
|
|
@ -755,29 +399,32 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
// Clear overlay
|
||||
overlayCtx.clearRect(0, 0, overlayCanvas.width, overlayCanvas.height)
|
||||
|
||||
// Find all quads in this frame
|
||||
const detectedQuads = findAllQuads(frameCanvas)
|
||||
// Use modular detector
|
||||
const detectedQuads = detector.detect(frameCanvas)
|
||||
|
||||
// Update tracking and get best quad
|
||||
const bestQuad = updateTrackedQuads(detectedQuads, frameCanvas.width, frameCanvas.height)
|
||||
// Use modular tracker
|
||||
const bestQuad = tracker.update(detectedQuads, {
|
||||
width: frameCanvas.width,
|
||||
height: frameCanvas.height,
|
||||
})
|
||||
const stats = tracker.getStats()
|
||||
|
||||
const detectionTime = performance.now() - startTime
|
||||
|
||||
// Draw all detected quads (faded) for debugging
|
||||
for (const quad of detectedQuads) {
|
||||
if (bestQuad && quad.centerId === bestQuad.id) continue
|
||||
drawQuad(overlayCtx, quad.corners, 'rgba(100, 100, 100, 0.3)', 2)
|
||||
}
|
||||
|
||||
// Draw best quad with color based on stability
|
||||
if (bestQuad) {
|
||||
const isStable = bestQuad.frameCount >= MIN_FRAMES_FOR_STABLE
|
||||
const isLocked = bestQuad.frameCount >= LOCKED_FRAME_COUNT
|
||||
// Update bestQuadRef for extractDocument
|
||||
bestQuadRef.current = bestQuad
|
||||
|
||||
let color: string
|
||||
let lineWidth: number
|
||||
|
||||
if (isLocked && bestQuad.stabilityScore > 0.5) {
|
||||
if (bestQuad.isLocked) {
|
||||
color = 'rgba(0, 255, 100, 0.95)'
|
||||
lineWidth = 6
|
||||
// Save stable frame
|
||||
|
|
@ -788,7 +435,7 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
lastStableFrameRef.current.height = frameCanvas.height
|
||||
const stableCtx = lastStableFrameRef.current.getContext('2d')
|
||||
stableCtx?.drawImage(frameCanvas, 0, 0)
|
||||
} else if (isStable) {
|
||||
} else if (bestQuad.isStable) {
|
||||
color = 'rgba(100, 255, 100, 0.85)'
|
||||
lineWidth = 5
|
||||
} else {
|
||||
|
|
@ -797,6 +444,8 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
}
|
||||
|
||||
drawQuad(overlayCtx, bestQuad.corners, color, lineWidth)
|
||||
} else {
|
||||
bestQuadRef.current = null
|
||||
}
|
||||
|
||||
// Update debug info
|
||||
|
|
@ -804,7 +453,7 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
...prev,
|
||||
lastDetectionMs: Math.round(detectionTime),
|
||||
quadsDetected: detectedQuads.length,
|
||||
trackedQuads: trackedQuadsRef.current.size,
|
||||
trackedQuads: stats.trackedCount,
|
||||
bestQuadStability: bestQuad?.stabilityScore ?? 0,
|
||||
bestQuadFrameCount: bestQuad?.frameCount ?? 0,
|
||||
lastDetectionError: null,
|
||||
|
|
@ -819,7 +468,7 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
return false
|
||||
}
|
||||
},
|
||||
[captureVideoFrame, findAllQuads, updateTrackedQuads, drawQuad]
|
||||
[captureVideoFrame, drawQuad]
|
||||
)
|
||||
|
||||
/**
|
||||
|
|
@ -1085,11 +734,24 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
[captureVideoFrame, distance, analyzeOrientation, rotateCanvas]
|
||||
)
|
||||
|
||||
// Compute derived state
|
||||
// Reset tracking state (call when returning from adjustment mode)
|
||||
const resetTracking = useCallback(() => {
|
||||
trackerRef.current?.reset()
|
||||
bestQuadRef.current = null
|
||||
lastStableFrameRef.current = null
|
||||
setDebugInfo((prev) => ({
|
||||
...prev,
|
||||
quadsDetected: 0,
|
||||
trackedQuads: 0,
|
||||
bestQuadStability: 0,
|
||||
bestQuadFrameCount: 0,
|
||||
}))
|
||||
}, [])
|
||||
|
||||
// Compute derived state (use isStable/isLocked from tracked quad)
|
||||
const bestQuad = bestQuadRef.current
|
||||
const isStable = bestQuad ? bestQuad.frameCount >= MIN_FRAMES_FOR_STABLE : false
|
||||
const isLocked =
|
||||
bestQuad && bestQuad.frameCount >= LOCKED_FRAME_COUNT && bestQuad.stabilityScore > 0.5
|
||||
const isStable = bestQuad?.isStable ?? false
|
||||
const isLocked = bestQuad?.isLocked ?? false
|
||||
|
||||
// Get current best quad corners
|
||||
const getBestQuadCorners = useCallback((): Array<{
|
||||
|
|
@ -1152,59 +814,57 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
* Detect quads in a static image (for file uploads and gallery edits)
|
||||
* Returns detected corners or fallback corners (full image)
|
||||
*/
|
||||
const detectQuadsInImage = useCallback(
|
||||
(canvas: HTMLCanvasElement): DetectQuadsInImageResult => {
|
||||
// Fallback corners (full image)
|
||||
const fallbackCorners = [
|
||||
{ x: 0, y: 0 },
|
||||
{ x: canvas.width, y: 0 },
|
||||
{ x: canvas.width, y: canvas.height },
|
||||
{ x: 0, y: canvas.height },
|
||||
]
|
||||
const detectQuadsInImage = useCallback((canvas: HTMLCanvasElement): DetectQuadsInImageResult => {
|
||||
// Fallback corners (full image)
|
||||
const fallbackCorners = [
|
||||
{ x: 0, y: 0 },
|
||||
{ x: canvas.width, y: 0 },
|
||||
{ x: canvas.width, y: canvas.height },
|
||||
{ x: 0, y: canvas.height },
|
||||
]
|
||||
|
||||
if (!cvRef.current) {
|
||||
const detector = detectorRef.current
|
||||
if (!detector) {
|
||||
return {
|
||||
detected: false,
|
||||
corners: fallbackCorners,
|
||||
sourceCanvas: canvas,
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Use modular detector
|
||||
const detectedQuads = detector.detect(canvas)
|
||||
|
||||
if (detectedQuads.length > 0) {
|
||||
// Return the best quad (largest area, already sorted)
|
||||
return {
|
||||
detected: false,
|
||||
corners: fallbackCorners,
|
||||
detected: true,
|
||||
corners: detectedQuads[0].corners,
|
||||
sourceCanvas: canvas,
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
// Run quad detection on the canvas
|
||||
const detectedQuads = findAllQuads(canvas)
|
||||
|
||||
if (detectedQuads.length > 0) {
|
||||
// Return the best quad (largest area, already sorted)
|
||||
return {
|
||||
detected: true,
|
||||
corners: detectedQuads[0].corners,
|
||||
sourceCanvas: canvas,
|
||||
}
|
||||
}
|
||||
|
||||
// No quads detected - return fallback
|
||||
return {
|
||||
detected: false,
|
||||
corners: fallbackCorners,
|
||||
sourceCanvas: canvas,
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('Quad detection failed:', err)
|
||||
return {
|
||||
detected: false,
|
||||
corners: fallbackCorners,
|
||||
sourceCanvas: canvas,
|
||||
}
|
||||
// No quads detected - return fallback
|
||||
return {
|
||||
detected: false,
|
||||
corners: fallbackCorners,
|
||||
sourceCanvas: canvas,
|
||||
}
|
||||
},
|
||||
[findAllQuads]
|
||||
)
|
||||
} catch (err) {
|
||||
console.warn('Quad detection failed:', err)
|
||||
return {
|
||||
detected: false,
|
||||
corners: fallbackCorners,
|
||||
sourceCanvas: canvas,
|
||||
}
|
||||
}
|
||||
}, [])
|
||||
|
||||
return {
|
||||
isLoading,
|
||||
error,
|
||||
isReady: !isLoading && !error && cvRef.current !== null,
|
||||
isReady: !isLoading && !error && detectorRef.current !== null,
|
||||
ensureOpenCVLoaded,
|
||||
isStable,
|
||||
isLocked: !!isLocked,
|
||||
|
|
@ -1216,6 +876,9 @@ export function useDocumentDetection(): UseDocumentDetectionReturn {
|
|||
extractDocument,
|
||||
detectQuadsInImage,
|
||||
loadImageToCanvas,
|
||||
resetTracking,
|
||||
updateDetectorConfig,
|
||||
detectorConfig,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,11 +9,7 @@
|
|||
|
||||
import { useCallback, useRef, useState } from 'react'
|
||||
import type { CV } from '@/lib/vision/opencv/types'
|
||||
import {
|
||||
loadOpenCV as loadOpenCVCore,
|
||||
getOpenCV,
|
||||
isOpenCVReady,
|
||||
} from '@/lib/vision/opencv/loader'
|
||||
import { loadOpenCV as loadOpenCVCore, getOpenCV, isOpenCVReady } from '@/lib/vision/opencv/loader'
|
||||
|
||||
export interface UseOpenCVReturn {
|
||||
/** OpenCV instance (null if not loaded) */
|
||||
|
|
|
|||
|
|
@ -24,7 +24,12 @@ export type {
|
|||
} from '@/lib/vision/quadDetection'
|
||||
|
||||
// Re-export utility functions
|
||||
export { loadImageToCanvas, captureVideoFrame, orderCorners, distance } from '@/lib/vision/quadDetection'
|
||||
export {
|
||||
loadImageToCanvas,
|
||||
captureVideoFrame,
|
||||
orderCorners,
|
||||
distance,
|
||||
} from '@/lib/vision/quadDetection'
|
||||
|
||||
/**
|
||||
* React hook for quad detection in static images.
|
||||
|
|
@ -89,13 +94,10 @@ export function useQuadDetection(options?: QuadDetectionOptions) {
|
|||
* Detect quads in a canvas.
|
||||
* Returns null if detector is not loaded.
|
||||
*/
|
||||
const detect = useCallback(
|
||||
(canvas: HTMLCanvasElement): QuadDetectionResult | null => {
|
||||
if (!detectorRef.current) return null
|
||||
return detectorRef.current.detect(canvas, optionsRef.current)
|
||||
},
|
||||
[]
|
||||
)
|
||||
const detect = useCallback((canvas: HTMLCanvasElement): QuadDetectionResult | null => {
|
||||
if (!detectorRef.current) return null
|
||||
return detectorRef.current.detect(canvas, optionsRef.current)
|
||||
}, [])
|
||||
|
||||
/**
|
||||
* Detect quads in an image file.
|
||||
|
|
@ -155,10 +157,13 @@ export function useQuadDetection(options?: QuadDetectionOptions) {
|
|||
/**
|
||||
* Extract a quad region using perspective transform.
|
||||
*/
|
||||
const extract = useCallback((canvas: HTMLCanvasElement, corners: Corner[]): HTMLCanvasElement | null => {
|
||||
if (!detectorRef.current) return null
|
||||
return detectorRef.current.extract(canvas, corners)
|
||||
}, [])
|
||||
const extract = useCallback(
|
||||
(canvas: HTMLCanvasElement, corners: Corner[]): HTMLCanvasElement | null => {
|
||||
if (!detectorRef.current) return null
|
||||
return detectorRef.current.extract(canvas, corners)
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
/**
|
||||
* Analyze document orientation.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,241 @@
|
|||
# Document Detection Research
|
||||
|
||||
Research notes on improving quad/document detection, particularly for handling finger occlusion and complex backgrounds.
|
||||
|
||||
**Date**: January 2026
|
||||
**Context**: Current OpenCV-based quad detection struggles with finger occlusion and busy backgrounds.
|
||||
|
||||
---
|
||||
|
||||
## The Core Problem
|
||||
|
||||
Standard Canny edge detection fails for document scanning because:
|
||||
> "The sections of text inside the document are strongly amplified, whereas the document edges—what we're interested in—show up very weakly."
|
||||
> — Dropbox Engineering
|
||||
|
||||
Traditional CV approaches (Canny + Hough) can only work with **visible edges**. When fingers occlude document corners, the edge pixels simply aren't there.
|
||||
|
||||
---
|
||||
|
||||
## Industry Approaches
|
||||
|
||||
### Dropbox (2016)
|
||||
**Source**: [Fast and Accurate Document Detection for Scanning](https://dropbox.tech/machine-learning/fast-and-accurate-document-detection-for-scanning)
|
||||
|
||||
1. **Custom ML-based edge detector** - trained to suppress text edges while keeping document boundaries (details proprietary)
|
||||
2. **Hough transform** for line detection from the cleaned edge map
|
||||
3. **Quadrilateral scoring** - enumerate all possible quads from line intersections, score each by summing edge probabilities along perimeter
|
||||
4. **Result**: 8-10 FPS, 60% fewer manual corrections vs Apple's SDK
|
||||
|
||||
Follow-up: [Improving the Responsiveness of the Document Detector](https://dropbox.tech/machine-learning/improving-the-responsiveness-of-the-document-detector)
|
||||
- Motion-based quad tracking between frames
|
||||
- Hybrid: full detection every ~100ms + fast tracking on intermediate frames
|
||||
|
||||
### Genius Scan (2024)
|
||||
**Source**: [Document Detection - How Deep Learning Has Changed The Game](https://blog.thegrizzlylabs.com/2024/10/document-detection.html)
|
||||
|
||||
**Key insight**: Combining DL + traditional CV raised accuracy from 51% → 75% → 85%:
|
||||
- DL provides **robustness** (handles occlusion, complex backgrounds)
|
||||
- Traditional CV provides **precision** (sub-pixel corner refinement)
|
||||
|
||||
Architecture:
|
||||
- MobileNet V2 backbone
|
||||
- Input resolution: 96×96 pixels
|
||||
- Training dataset: 1M+ images
|
||||
- Pre-training: ImageNet, fine-tuned on document data
|
||||
- Performance: 25+ FPS on mobile
|
||||
|
||||
### Scanner Pro (Readdle)
|
||||
**Source**: [Inside ScannerPro: the Tech behind perfect scans](https://readdle.com/blog/scanner-pro-border-detection)
|
||||
|
||||
Evolution:
|
||||
1. Traditional CV (Canny + Hough) - baseline
|
||||
2. Semantic segmentation - too slow (5 FPS on iPhone X)
|
||||
3. **Keypoint detection** - direct corner prediction, 30+ FPS
|
||||
|
||||
Key techniques:
|
||||
- MobileNet-based keypoint detector
|
||||
- Kalman filter + IMU data for temporal smoothing
|
||||
- Two-stage: lightweight detector for streaming, heavier model on capture
|
||||
|
||||
### Academic Approaches
|
||||
|
||||
**Multi-document detection via corner localization**:
|
||||
- Joint Corner Detector (JCD) with attention mechanism
|
||||
- Coarse-to-fine: rough prediction → corner-specific refinement
|
||||
- Datasets: ICDAR 2015 SmartDoc, SEECS-NUSF, MIDV-500
|
||||
|
||||
**Semantic segmentation** (LearnOpenCV tutorial):
|
||||
- DeepLabv3 with MobileNetV3-Large backbone
|
||||
- Binary segmentation (document vs background)
|
||||
- Trained on synthetic data with augmentation
|
||||
- Extract corners from mask via contour detection
|
||||
|
||||
---
|
||||
|
||||
## Modern Architecture Pattern
|
||||
|
||||
The recommended approach for robust document detection:
|
||||
|
||||
```
|
||||
Input Image (downscaled to 96-256px)
|
||||
↓
|
||||
┌───────────────────┐
|
||||
│ MobileNetV2/V3 │ ← Pretrained on ImageNet
|
||||
│ Feature Extractor│
|
||||
└─────────┬─────────┘
|
||||
↓
|
||||
┌───────────────────┐
|
||||
│ Regression Head │ ← 8 outputs (x,y for 4 corners)
|
||||
│ (or Heatmap Head)│
|
||||
└─────────┬─────────┘
|
||||
↓
|
||||
Corner Coordinates
|
||||
↓
|
||||
┌───────────────────┐
|
||||
│ Optional: CV │ ← Sub-pixel refinement
|
||||
│ Refinement │
|
||||
└───────────────────┘
|
||||
```
|
||||
|
||||
**Why this works for occlusion**: The network learns document shape priors and can predict where corners *should* be even when they're not visible.
|
||||
|
||||
---
|
||||
|
||||
## Hugging Face Models
|
||||
|
||||
### Document-Specific
|
||||
|
||||
**[ordaktaktak/Document-Scanner](https://huggingface.co/ordaktaktak/Document-Scanner)**
|
||||
- Architecture: U-Net semantic segmentation
|
||||
- Input: Grayscale 256×256
|
||||
- Output: Binary mask → extract corners via contour detection
|
||||
- Framework: PyTorch (.pth weights)
|
||||
- Status: Would need conversion to ONNX/TF.js
|
||||
|
||||
### Background Removal (Could Adapt)
|
||||
|
||||
**[briaai/RMBG-2.0](https://huggingface.co/briaai/RMBG-2.0)**
|
||||
- Architecture: BiRefNet (0.2B params) - too large for real-time
|
||||
- Input: 1024×1024 RGB
|
||||
- Output: Alpha matte
|
||||
- Transformers.js compatible
|
||||
- License: CC BY-NC 4.0 (non-commercial)
|
||||
|
||||
**[briaai/RMBG-1.4](https://huggingface.co/briaai/RMBG-1.4)**
|
||||
- Smaller version (44.1M params)
|
||||
- Same approach, might be more practical
|
||||
|
||||
### General Segmentation (Transformers.js Ready)
|
||||
|
||||
| Model | Size | Use Case |
|
||||
|-------|------|----------|
|
||||
| `Xenova/deeplabv3-mobilevit-xx-small` | Tiny | Fast, low accuracy |
|
||||
| `Xenova/deeplabv3-mobilevit-small` | Small | Balanced |
|
||||
| `Xenova/deeplabv3-mobilevit-x-small` | X-Small | Middle ground |
|
||||
| `nnny/onnx-mobile-sam` | ~5MB | General segmentation with prompts |
|
||||
|
||||
### SAM-based Approach
|
||||
|
||||
Could use Segment Anything Model with point prompts:
|
||||
1. User taps roughly in document area
|
||||
2. SAM segments the document
|
||||
3. Extract corners from segmentation mask
|
||||
|
||||
Models: `nnny/onnx-mobile-sam`, SlimSAM variants
|
||||
|
||||
---
|
||||
|
||||
## Implementation Options
|
||||
|
||||
### Option 1: Convert Document-Scanner to ONNX
|
||||
|
||||
```bash
|
||||
# Download PyTorch model
|
||||
# Convert with torch.onnx.export()
|
||||
# Use with ONNX Runtime Web
|
||||
|
||||
import torch
|
||||
model = load_document_scanner_model()
|
||||
dummy_input = torch.randn(1, 1, 256, 256) # grayscale
|
||||
torch.onnx.export(model, dummy_input, "document_scanner.onnx")
|
||||
```
|
||||
|
||||
Pros: Purpose-built for documents
|
||||
Cons: Still outputs mask, need CV for corners
|
||||
|
||||
### Option 2: SAM with Point Prompts
|
||||
|
||||
```typescript
|
||||
import { pipeline } from '@huggingface/transformers';
|
||||
|
||||
const segmenter = await pipeline('image-segmentation', 'nnny/onnx-mobile-sam');
|
||||
const result = await segmenter(image, { points: [[centerX, centerY]] });
|
||||
// Extract corners from mask
|
||||
```
|
||||
|
||||
Pros: No training needed, handles complex shapes
|
||||
Cons: Requires user interaction (point prompt)
|
||||
|
||||
### Option 3: Train Custom Corner Detector
|
||||
|
||||
Training a lightweight model specifically for corner prediction:
|
||||
|
||||
1. **Architecture**: MobileNetV2 → 8-output regression (4 corners × 2 coords)
|
||||
2. **Training data**:
|
||||
- SmartDoc dataset (ICDAR)
|
||||
- DocVQA documents on backgrounds
|
||||
- Synthetic: random quads with augmentation
|
||||
- **Critical**: Include finger occlusion augmentation
|
||||
3. **Output**: Normalized corner coordinates [0,1]
|
||||
4. **Export**: TensorFlow.js or ONNX
|
||||
|
||||
This is what Dropbox, Genius Scan, and Scanner Pro actually do.
|
||||
|
||||
### Option 4: Hybrid (Recommended for Production)
|
||||
|
||||
1. **Primary**: Lightweight CNN corner predictor (handles occlusion)
|
||||
2. **Refinement**: Traditional CV on predicted region (sub-pixel accuracy)
|
||||
3. **Tracking**: Kalman filter for temporal stability
|
||||
|
||||
---
|
||||
|
||||
## Datasets
|
||||
|
||||
| Dataset | Size | Notes |
|
||||
|---------|------|-------|
|
||||
| SmartDoc (ICDAR 2015) | 4,260 images | Competition dataset, labeled corners |
|
||||
| MIDV-500 | 500 video clips | ID documents, challenging conditions |
|
||||
| DocVQA | 50K+ images | Document images (need corner labels) |
|
||||
| Synthetic | Unlimited | Generate documents on backgrounds |
|
||||
|
||||
---
|
||||
|
||||
## Key Takeaways
|
||||
|
||||
1. **Traditional CV (our current approach) will always struggle with occlusion** - it can only see visible edges
|
||||
|
||||
2. **The industry solution is learned corner prediction** - networks trained on documents learn shape priors
|
||||
|
||||
3. **Hybrid approaches work best** - DL for robustness, CV for precision
|
||||
|
||||
4. **No ready-to-use model exists** on Hugging Face that:
|
||||
- Is specifically trained for document corners
|
||||
- Handles finger occlusion
|
||||
- Is already in TF.js/ONNX format
|
||||
|
||||
5. **Realistic path forward**:
|
||||
- Short term: Try SAM with prompts, or convert Document-Scanner to ONNX
|
||||
- Long term: Train custom MobileNet-based corner detector
|
||||
|
||||
---
|
||||
|
||||
## References
|
||||
|
||||
- [Dropbox: Fast and Accurate Document Detection](https://dropbox.tech/machine-learning/fast-and-accurate-document-detection-for-scanning)
|
||||
- [Dropbox: Improving Responsiveness](https://dropbox.tech/machine-learning/improving-the-responsiveness-of-the-document-detector)
|
||||
- [Genius Scan: Document Detection with Deep Learning](https://blog.thegrizzlylabs.com/2024/10/document-detection.html)
|
||||
- [Scanner Pro: Tech Behind Perfect Scans](https://readdle.com/blog/scanner-pro-border-detection)
|
||||
- [LearnOpenCV: Document Segmentation with DeepLabv3](https://learnopencv.com/deep-learning-based-document-segmentation-using-semantic-segmentation-deeplabv3-on-custom-dataset/)
|
||||
- [Transformers.js Documentation](https://huggingface.co/docs/transformers.js/en/index)
|
||||
- [U-Net Paper (arxiv 1505.04597)](https://arxiv.org/abs/1505.04597)
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
*/
|
||||
export async function simpleDelay(ms: number): Promise<string> {
|
||||
console.log('[simpleAsync] simpleDelay called with', ms)
|
||||
await new Promise(resolve => setTimeout(resolve, ms))
|
||||
await new Promise((resolve) => setTimeout(resolve, ms))
|
||||
console.log('[simpleAsync] delay complete')
|
||||
return 'done'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,9 @@
|
|||
|
||||
export interface CVMat {
|
||||
delete: () => void
|
||||
copyTo: (dst: CVMat) => void
|
||||
data32S: Int32Array
|
||||
data32F: Float32Array
|
||||
data: ArrayBuffer
|
||||
rows: number
|
||||
cols: number
|
||||
|
|
@ -46,7 +48,48 @@ export interface CV {
|
|||
borderType: number
|
||||
) => void
|
||||
Canny: (src: CVMat, dst: CVMat, t1: number, t2: number) => void
|
||||
Sobel: (src: CVMat, dst: CVMat, ddepth: number, dx: number, dy: number, ksize?: number) => void
|
||||
addWeighted: (
|
||||
src1: CVMat,
|
||||
alpha: number,
|
||||
src2: CVMat,
|
||||
beta: number,
|
||||
gamma: number,
|
||||
dst: CVMat
|
||||
) => void
|
||||
convertScaleAbs: (src: CVMat, dst: CVMat, alpha?: number, beta?: number) => void
|
||||
equalizeHist: (src: CVMat, dst: CVMat) => void
|
||||
adaptiveThreshold: (
|
||||
src: CVMat,
|
||||
dst: CVMat,
|
||||
maxValue: number,
|
||||
adaptiveMethod: number,
|
||||
thresholdType: number,
|
||||
blockSize: number,
|
||||
C: number
|
||||
) => void
|
||||
threshold: (src: CVMat, dst: CVMat, thresh: number, maxval: number, type: number) => number
|
||||
bilateralFilter: (
|
||||
src: CVMat,
|
||||
dst: CVMat,
|
||||
d: number,
|
||||
sigmaColor: number,
|
||||
sigmaSpace: number,
|
||||
borderType?: number
|
||||
) => void
|
||||
morphologyEx: (
|
||||
src: CVMat,
|
||||
dst: CVMat,
|
||||
op: number,
|
||||
kernel: CVMat,
|
||||
anchor?: CVPoint,
|
||||
iterations?: number
|
||||
) => void
|
||||
getStructuringElement: (shape: number, ksize: CVSize, anchor?: CVPoint) => CVMat
|
||||
erode: (src: CVMat, dst: CVMat, kernel: CVMat, anchor?: CVPoint, iterations?: number) => void
|
||||
dilate: (src: CVMat, dst: CVMat, kernel: CVMat, anchor: CVPoint, iterations: number) => void
|
||||
bitwise_or: (src1: CVMat, src2: CVMat, dst: CVMat) => void
|
||||
bitwise_and: (src1: CVMat, src2: CVMat, dst: CVMat) => void
|
||||
findContours: (
|
||||
src: CVMat,
|
||||
contours: CVMatVector,
|
||||
|
|
@ -57,6 +100,17 @@ export interface CV {
|
|||
contourArea: (contour: CVMat) => number
|
||||
arcLength: (contour: CVMat, closed: boolean) => number
|
||||
approxPolyDP: (contour: CVMat, approx: CVMat, epsilon: number, closed: boolean) => void
|
||||
convexHull: (src: CVMat, dst: CVMat, clockwise: boolean, returnPoints: boolean) => void
|
||||
// Hough line detection
|
||||
HoughLinesP: (
|
||||
src: CVMat,
|
||||
lines: CVMat,
|
||||
rho: number,
|
||||
theta: number,
|
||||
threshold: number,
|
||||
minLineLength?: number,
|
||||
maxLineGap?: number
|
||||
) => void
|
||||
getPerspectiveTransform: (src: CVMat, dst: CVMat) => CVMat
|
||||
warpPerspective: (
|
||||
src: CVMat,
|
||||
|
|
@ -72,13 +126,34 @@ export interface CV {
|
|||
COLOR_RGBA2GRAY: number
|
||||
BORDER_DEFAULT: number
|
||||
RETR_LIST: number
|
||||
RETR_EXTERNAL: number
|
||||
CHAIN_APPROX_SIMPLE: number
|
||||
CV_32FC2: number
|
||||
CV_32SC4: number
|
||||
CV_8U: number
|
||||
CV_16S: number
|
||||
CV_64F: number
|
||||
INTER_LINEAR: number
|
||||
BORDER_CONSTANT: number
|
||||
ROTATE_90_CLOCKWISE: number
|
||||
ROTATE_180: number
|
||||
ROTATE_90_COUNTERCLOCKWISE: number
|
||||
// Threshold types
|
||||
THRESH_BINARY: number
|
||||
THRESH_BINARY_INV: number
|
||||
THRESH_OTSU: number
|
||||
// Adaptive threshold methods
|
||||
ADAPTIVE_THRESH_MEAN_C: number
|
||||
ADAPTIVE_THRESH_GAUSSIAN_C: number
|
||||
// Morphological operations
|
||||
MORPH_RECT: number
|
||||
MORPH_ELLIPSE: number
|
||||
MORPH_CROSS: number
|
||||
MORPH_OPEN: number
|
||||
MORPH_CLOSE: number
|
||||
MORPH_GRADIENT: number
|
||||
MORPH_DILATE: number
|
||||
MORPH_ERODE: number
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,266 @@
|
|||
'use client'
|
||||
|
||||
/**
|
||||
* useQuadDetection Hook
|
||||
*
|
||||
* A React hook that combines OpenCV loading, quad detection, and temporal tracking.
|
||||
* Provides a clean API for detecting quadrilaterals in both camera feeds and static images.
|
||||
*
|
||||
* Usage:
|
||||
* ```tsx
|
||||
* import { OpenCvProvider } from 'opencv-react'
|
||||
* import { useQuadDetection } from '@/lib/vision/useQuadDetection'
|
||||
*
|
||||
* // Wrap your app/page with OpenCvProvider
|
||||
* <OpenCvProvider>
|
||||
* <MyComponent />
|
||||
* </OpenCvProvider>
|
||||
*
|
||||
* // In your component:
|
||||
* function MyComponent() {
|
||||
* const {
|
||||
* isReady,
|
||||
* detectInImage,
|
||||
* processFrame,
|
||||
* trackedQuad,
|
||||
* stats,
|
||||
* resetTracking,
|
||||
* } = useQuadDetection()
|
||||
*
|
||||
* // For static images:
|
||||
* const quads = detectInImage(canvas)
|
||||
*
|
||||
* // For camera feeds (call each frame):
|
||||
* const bestQuad = processFrame(videoFrame)
|
||||
* }
|
||||
* ```
|
||||
*/
|
||||
|
||||
import { useCallback, useMemo, useRef } from 'react'
|
||||
import { useOpenCv } from 'opencv-react'
|
||||
import {
|
||||
createQuadDetector,
|
||||
type DetectedQuad,
|
||||
type DebugPolygon,
|
||||
type QuadDetectorConfig,
|
||||
} from './quadDetector'
|
||||
import { createQuadTracker, type TrackedQuad, type QuadTrackerConfig } from './quadTracker'
|
||||
import type { CV } from './opencv/types'
|
||||
|
||||
// Re-export types for convenience
|
||||
export type { DetectedQuad, Point, DebugPolygon } from './quadDetector'
|
||||
export type { TrackedQuad } from './quadTracker'
|
||||
export type { QuadDetectorConfig } from './quadDetector'
|
||||
export type { QuadTrackerConfig } from './quadTracker'
|
||||
|
||||
/** Configuration for useQuadDetection */
|
||||
export interface UseQuadDetectionConfig {
|
||||
/** Quad detector configuration */
|
||||
detector?: Partial<QuadDetectorConfig>
|
||||
/** Quad tracker configuration */
|
||||
tracker?: Partial<QuadTrackerConfig>
|
||||
}
|
||||
|
||||
/** Stats returned by the hook */
|
||||
export interface QuadDetectionStats {
|
||||
/** Number of quads currently being tracked */
|
||||
trackedCount: number
|
||||
/** Total frames processed */
|
||||
frameCount: number
|
||||
/** Stability score of the best quad (0-1) */
|
||||
bestStability: number
|
||||
/** Frame count of the best quad */
|
||||
bestFrameCount: number
|
||||
}
|
||||
|
||||
/** Result from processing a single frame */
|
||||
export interface FrameProcessingResult {
|
||||
/** Best tracked quad, or null if none */
|
||||
trackedQuad: TrackedQuad | null
|
||||
/** All quads detected in this frame (before tracking) */
|
||||
detectedQuads: DetectedQuad[]
|
||||
/** Current tracking statistics */
|
||||
stats: QuadDetectionStats
|
||||
}
|
||||
|
||||
/** Return type of useQuadDetection */
|
||||
export interface UseQuadDetectionReturn {
|
||||
/** Whether OpenCV is loaded and detector is ready */
|
||||
isReady: boolean
|
||||
/** Whether OpenCV is currently loading */
|
||||
isLoading: boolean
|
||||
/** Error message if loading failed */
|
||||
error: string | null
|
||||
|
||||
/**
|
||||
* Detect quads in a static image (one-shot, no tracking)
|
||||
* @param source - Canvas to detect in
|
||||
* @returns Array of detected quads, sorted by area (largest first)
|
||||
*/
|
||||
detectInImage: (source: HTMLCanvasElement) => DetectedQuad[]
|
||||
|
||||
/**
|
||||
* Detect quads with debug info about all candidate polygons.
|
||||
* Use this to understand why detection is failing.
|
||||
* @param source - Canvas to detect in
|
||||
* @returns Quads and debug info about all candidates
|
||||
*/
|
||||
detectWithDebug: (source: HTMLCanvasElement) => {
|
||||
quads: DetectedQuad[]
|
||||
debugPolygons: DebugPolygon[]
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a video frame with tracking
|
||||
* Call this each frame for camera/video feeds
|
||||
* @param source - Canvas from video frame
|
||||
* @param frameSize - Optional explicit frame size (inferred from source if not provided)
|
||||
* @returns Frame processing result with tracked quad, detected quads, and stats
|
||||
*/
|
||||
processFrame: (
|
||||
source: HTMLCanvasElement,
|
||||
frameSize?: { width: number; height: number }
|
||||
) => FrameProcessingResult
|
||||
|
||||
/** The current best tracked quad */
|
||||
trackedQuad: TrackedQuad | null
|
||||
|
||||
/** All currently tracked quads */
|
||||
allTrackedQuads: TrackedQuad[]
|
||||
|
||||
/** Current tracking statistics */
|
||||
stats: QuadDetectionStats
|
||||
|
||||
/** Reset all tracking state (call when switching cameras, etc.) */
|
||||
resetTracking: () => void
|
||||
}
|
||||
|
||||
/**
|
||||
* React hook for quad detection with optional temporal tracking.
|
||||
*
|
||||
* Must be used inside an OpenCvProvider from 'opencv-react'.
|
||||
*
|
||||
* @param config - Optional configuration for detector and tracker
|
||||
*/
|
||||
export function useQuadDetection(config?: UseQuadDetectionConfig): UseQuadDetectionReturn {
|
||||
const { loaded: opencvLoaded, cv } = useOpenCv()
|
||||
|
||||
// Track the current best quad in a ref for synchronous access
|
||||
const trackedQuadRef = useRef<TrackedQuad | null>(null)
|
||||
const allTrackedRef = useRef<TrackedQuad[]>([])
|
||||
const statsRef = useRef<QuadDetectionStats>({
|
||||
trackedCount: 0,
|
||||
frameCount: 0,
|
||||
bestStability: 0,
|
||||
bestFrameCount: 0,
|
||||
})
|
||||
|
||||
// Create detector when cv is available
|
||||
const detector = useMemo(() => {
|
||||
if (!opencvLoaded || !cv) return null
|
||||
try {
|
||||
return createQuadDetector(cv as CV, config?.detector)
|
||||
} catch (err) {
|
||||
console.error('[useQuadDetection] Failed to create detector:', err)
|
||||
return null
|
||||
}
|
||||
}, [opencvLoaded, cv, config?.detector])
|
||||
|
||||
// Create tracker (doesn't need cv)
|
||||
const tracker = useMemo(() => createQuadTracker(config?.tracker), [config?.tracker])
|
||||
|
||||
// Detect in static image (no tracking)
|
||||
const detectInImage = useCallback(
|
||||
(source: HTMLCanvasElement): DetectedQuad[] => {
|
||||
if (!detector) {
|
||||
console.warn('[useQuadDetection] detectInImage called before detector ready')
|
||||
return []
|
||||
}
|
||||
return detector.detect(source)
|
||||
},
|
||||
[detector]
|
||||
)
|
||||
|
||||
// Detect with debug info (for debugging detection issues)
|
||||
const detectWithDebug = useCallback(
|
||||
(source: HTMLCanvasElement): { quads: DetectedQuad[]; debugPolygons: DebugPolygon[] } => {
|
||||
if (!detector) {
|
||||
console.warn('[useQuadDetection] detectWithDebug called before detector ready')
|
||||
return { quads: [], debugPolygons: [] }
|
||||
}
|
||||
return detector.detectWithDebug(source)
|
||||
},
|
||||
[detector]
|
||||
)
|
||||
|
||||
// Process video frame with tracking
|
||||
const processFrame = useCallback(
|
||||
(
|
||||
source: HTMLCanvasElement,
|
||||
frameSize?: { width: number; height: number }
|
||||
): FrameProcessingResult => {
|
||||
if (!detector) {
|
||||
return {
|
||||
trackedQuad: null,
|
||||
detectedQuads: [],
|
||||
stats: statsRef.current,
|
||||
}
|
||||
}
|
||||
|
||||
// Detect quads in frame
|
||||
const quads = detector.detect(source)
|
||||
|
||||
// Determine frame size
|
||||
const size = frameSize ?? {
|
||||
width: source.width,
|
||||
height: source.height,
|
||||
}
|
||||
|
||||
// Update tracker
|
||||
const bestQuad = tracker.update(quads, size)
|
||||
const currentStats = tracker.getStats()
|
||||
const allTracked = tracker.getAllTracked()
|
||||
|
||||
// Update refs
|
||||
trackedQuadRef.current = bestQuad
|
||||
allTrackedRef.current = allTracked
|
||||
statsRef.current = currentStats
|
||||
|
||||
return {
|
||||
trackedQuad: bestQuad,
|
||||
detectedQuads: quads,
|
||||
stats: currentStats,
|
||||
}
|
||||
},
|
||||
[detector, tracker]
|
||||
)
|
||||
|
||||
// Reset tracking
|
||||
const resetTracking = useCallback(() => {
|
||||
tracker.reset()
|
||||
trackedQuadRef.current = null
|
||||
allTrackedRef.current = []
|
||||
statsRef.current = {
|
||||
trackedCount: 0,
|
||||
frameCount: 0,
|
||||
bestStability: 0,
|
||||
bestFrameCount: 0,
|
||||
}
|
||||
}, [tracker])
|
||||
|
||||
return {
|
||||
isReady: !!detector,
|
||||
isLoading: !opencvLoaded,
|
||||
error: null, // opencv-react doesn't expose errors directly
|
||||
|
||||
detectInImage,
|
||||
detectWithDebug,
|
||||
processFrame,
|
||||
|
||||
trackedQuad: trackedQuadRef.current,
|
||||
allTrackedQuads: allTrackedRef.current,
|
||||
stats: statsRef.current,
|
||||
|
||||
resetTracking,
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue