feat(vision): integrate ML classifier for real-time abacus detection

- Enable auto-detection in DockedVisionFeed using ML column classifier
- Replace CV-based detection with useColumnClassifier hook
- Add concurrent inference prevention with isInferringRef
- Show model loading status in detection overlay
- Add detectedPrefixIndex prop to VerticalProblem for visual feedback
- Display ✓ checkmarks on terms completed via vision detection
- Connect vision detection to term tracking and auto-submit flow

The vision system now:
1. Detects abacus values via ML classification at 10fps
2. Shows visual feedback (checkmarks) as terms are completed
3. Triggers help mode when prefix sums are detected
4. Auto-submits when the correct answer is shown

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Thomas Hallock 2026-01-07 18:36:52 -06:00
parent 71c5321431
commit fb57d1f2ef
3 changed files with 135 additions and 65 deletions

View File

@ -1882,6 +1882,12 @@ export function ActiveSession({
: undefined
}
rejectedDigit={attempt.rejectedDigit}
detectedPrefixIndex={
// Show vision feedback for prefix sums (not final answer)
matchedPrefixIndex >= 0 && matchedPrefixIndex < prefixSums.length - 1
? matchedPrefixIndex
: undefined
}
helpOverlay={
// Always render overlay when in help mode (for exit transition)
showHelpOverlay && helpContext ? (

View File

@ -40,6 +40,8 @@ interface VerticalProblemProps {
generationTrace?: GenerationTrace
/** Complexity budget constraint (for debug overlay) */
complexityBudget?: number
/** Index of detected prefix sum from vision (shows visual indicator on completed terms) */
detectedPrefixIndex?: number
}
/**
@ -68,6 +70,7 @@ export function VerticalProblem({
answerFadingOut = false,
generationTrace,
complexityBudget,
detectedPrefixIndex,
}: VerticalProblemProps) {
const { resolvedTheme } = useTheme()
const isDark = resolvedTheme === 'dark'
@ -163,12 +166,25 @@ export function VerticalProblem({
const showNeedHelp = index === needHelpTermIndex && !isCurrentHelp
// Check if this term is already included in the prefix sum (when in help mode)
const isInPrefixSum = currentHelpTermIndex !== undefined && index < currentHelpTermIndex
// Check if this term is completed based on vision detection (before help mode)
const isVisionCompleted =
detectedPrefixIndex !== undefined &&
currentHelpTermIndex === undefined && // Only show when NOT in help mode
index <= detectedPrefixIndex
return (
<div
key={index}
data-element="term-row"
data-term-status={isCurrentHelp ? 'current' : showNeedHelp ? 'need-help' : 'pending'}
data-term-status={
isCurrentHelp
? 'current'
: showNeedHelp
? 'need-help'
: isVisionCompleted
? 'vision-completed'
: 'pending'
}
className={css({
display: 'flex',
alignItems: 'center',
@ -232,6 +248,22 @@ export function VerticalProblem({
</div>
)}
{/* Checkmark indicator for vision-detected completed terms */}
{isVisionCompleted && (
<div
data-element="vision-completed"
className={css({
position: 'absolute',
left: '-2rem',
color: isDark ? 'green.400' : 'green.600',
fontSize: '1.25rem',
opacity: 0.8,
})}
>
</div>
)}
{/* Operator column (only show minus for negative) */}
<div
data-element="operator"

View File

@ -5,6 +5,8 @@ import { useMyAbacus } from '@/contexts/MyAbacusContext'
import { useFrameStability } from '@/hooks/useFrameStability'
import { useMarkerDetection } from '@/hooks/useMarkerDetection'
import { useRemoteCameraDesktop } from '@/hooks/useRemoteCameraDesktop'
import { useColumnClassifier } from '@/hooks/useColumnClassifier'
import { processVideoFrame, processImageFrame, digitsToNumber } from '@/lib/vision/frameProcessor'
import { VisionCameraFeed } from './VisionCameraFeed'
import { css } from '../../../styled-system/css'
@ -12,7 +14,7 @@ import { css } from '../../../styled-system/css'
* Feature flag: Enable automatic abacus value detection from video feed.
*
* When enabled:
* - Runs CV-based bead detection on video frames
* - Runs ML-based digit classification on video frames
* - Shows detected value overlay
* - Calls setDockedValue and onValueDetected with detected values
*
@ -20,30 +22,8 @@ import { css } from '../../../styled-system/css'
* - Only shows the video feed (no detection)
* - Hides the detection overlay
* - Does not interfere with student's manual input
*
* Set to true when ready to work on improving detection accuracy.
*/
const ENABLE_AUTO_DETECTION = false
// Only import detection modules when auto-detection is enabled
// This ensures the detection code is tree-shaken when disabled
let analyzeColumns: typeof import('@/lib/vision/beadDetector').analyzeColumns
let analysesToDigits: typeof import('@/lib/vision/beadDetector').analysesToDigits
let digitsToNumber: typeof import('@/lib/vision/beadDetector').digitsToNumber
let processVideoFrame: typeof import('@/lib/vision/frameProcessor').processVideoFrame
let processImageFrame: typeof import('@/lib/vision/frameProcessor').processImageFrame
if (ENABLE_AUTO_DETECTION) {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const beadDetector = require('@/lib/vision/beadDetector')
// eslint-disable-next-line @typescript-eslint/no-require-imports
const frameProcessor = require('@/lib/vision/frameProcessor')
analyzeColumns = beadDetector.analyzeColumns
analysesToDigits = beadDetector.analysesToDigits
digitsToNumber = beadDetector.digitsToNumber
processVideoFrame = frameProcessor.processVideoFrame
processImageFrame = frameProcessor.processImageFrame
}
const ENABLE_AUTO_DETECTION = true
interface DockedVisionFeedProps {
/** Called when a stable value is detected */
@ -76,6 +56,7 @@ export function DockedVisionFeed({ onValueDetected, columnCount = 5 }: DockedVis
const animationFrameRef = useRef<number | null>(null)
const lastInferenceTimeRef = useRef<number>(0)
const lastBroadcastTimeRef = useRef<number>(0)
const isInferringRef = useRef(false) // Prevent concurrent inference
const [videoStream, setVideoStream] = useState<MediaStream | null>(null)
const [error, setError] = useState<string | null>(null)
@ -86,6 +67,16 @@ export function DockedVisionFeed({ onValueDetected, columnCount = 5 }: DockedVis
// Track video element in state for marker detection hook
const [videoElement, setVideoElement] = useState<HTMLVideoElement | null>(null)
// ML column classifier hook
const classifier = useColumnClassifier()
// Preload the ML model when component mounts
useEffect(() => {
if (ENABLE_AUTO_DETECTION) {
classifier.preload()
}
}, [classifier])
// Stability tracking for detected values (hook must be called unconditionally)
const stability = useFrameStability()
@ -225,9 +216,11 @@ export function DockedVisionFeed({ onValueDetected, columnCount = 5 }: DockedVis
}, [isRemoteCamera, remoteIsPhoneConnected])
// Process local camera frames for detection (only when enabled)
const processLocalFrame = useCallback(() => {
// Skip detection when feature is disabled
const processLocalFrame = useCallback(async () => {
// Skip detection when feature is disabled or model not ready
if (!ENABLE_AUTO_DETECTION) return
if (!classifier.isModelLoaded) return
if (isInferringRef.current) return // Skip if already inferring
const now = performance.now()
if (now - lastInferenceTimeRef.current < INFERENCE_INTERVAL_MS) {
@ -239,25 +232,36 @@ export function DockedVisionFeed({ onValueDetected, columnCount = 5 }: DockedVis
if (!video || video.readyState < 2) return
if (!visionConfig.calibration) return
// Process video frame into column strips
const columnImages = processVideoFrame(video, visionConfig.calibration)
if (columnImages.length === 0) return
isInferringRef.current = true
// Use CV-based bead detection
const analyses = analyzeColumns(columnImages)
const { digits, minConfidence } = analysesToDigits(analyses)
try {
// Process video frame into column strips
const columnImages = processVideoFrame(video, visionConfig.calibration)
if (columnImages.length === 0) return
// Convert to number
const value = digitsToNumber(digits)
// Use ML-based digit classification
const results = await classifier.classifyColumns(columnImages)
if (!results || results.digits.length === 0) return
// Push to stability buffer
stability.pushFrame(value, minConfidence)
}, [visionConfig.calibration, stability])
// Extract digits and minimum confidence
const { digits, confidences } = results
const minConfidence = Math.min(...confidences)
// Convert to number
const value = digitsToNumber(digits)
// Push to stability buffer
stability.pushFrame(value, minConfidence)
} finally {
isInferringRef.current = false
}
}, [visionConfig.calibration, stability, classifier])
// Process remote camera frames for detection (only when enabled)
useEffect(() => {
// Skip detection when feature is disabled
// Skip detection when feature is disabled or model not ready
if (!ENABLE_AUTO_DETECTION) return
if (!classifier.isModelLoaded) return
if (!isRemoteCamera || !remoteIsPhoneConnected || !remoteLatestFrame) {
return
@ -267,32 +271,46 @@ export function DockedVisionFeed({ onValueDetected, columnCount = 5 }: DockedVis
if (now - lastInferenceTimeRef.current < INFERENCE_INTERVAL_MS) {
return
}
lastInferenceTimeRef.current = now
const image = remoteImageRef.current
if (!image || !image.complete || image.naturalWidth === 0) {
return
}
// Prevent concurrent inference
if (isInferringRef.current) return
isInferringRef.current = true
lastInferenceTimeRef.current = now
// Phone sends pre-cropped frames in auto mode, so no calibration needed
const columnImages = processImageFrame(image, null, columnCount)
if (columnImages.length === 0) return
if (columnImages.length === 0) {
isInferringRef.current = false
return
}
// Use CV-based bead detection
const analyses = analyzeColumns(columnImages)
const { digits, minConfidence } = analysesToDigits(analyses)
// Use ML-based digit classification (async)
classifier.classifyColumns(columnImages).then((results) => {
isInferringRef.current = false
if (!results || results.digits.length === 0) return
// Convert to number
const value = digitsToNumber(digits)
// Extract digits and minimum confidence
const { digits, confidences } = results
const minConfidence = Math.min(...confidences)
// Push to stability buffer
stability.pushFrame(value, minConfidence)
}, [isRemoteCamera, remoteIsPhoneConnected, remoteLatestFrame, columnCount, stability])
// Convert to number
const value = digitsToNumber(digits)
// Push to stability buffer
stability.pushFrame(value, minConfidence)
})
}, [isRemoteCamera, remoteIsPhoneConnected, remoteLatestFrame, columnCount, stability, classifier])
// Local camera detection loop (only when enabled)
useEffect(() => {
// Skip detection loop when feature is disabled
// Skip detection loop when feature is disabled or model not loaded
if (!ENABLE_AUTO_DETECTION) return
if (!classifier.isModelLoaded) return
if (!visionConfig.enabled || !isLocalCamera || !videoStream || !visionConfig.calibration) {
return
@ -303,6 +321,7 @@ export function DockedVisionFeed({ onValueDetected, columnCount = 5 }: DockedVis
const loop = () => {
if (!running) return
// processLocalFrame is async but we don't await - it handles concurrency internally
processLocalFrame()
animationFrameRef.current = requestAnimationFrame(loop)
}
@ -322,6 +341,7 @@ export function DockedVisionFeed({ onValueDetected, columnCount = 5 }: DockedVis
videoStream,
visionConfig.calibration,
processLocalFrame,
classifier.isModelLoaded,
])
// Handle stable value changes (only when auto-detection is enabled)
@ -553,28 +573,40 @@ export function DockedVisionFeed({ onValueDetected, columnCount = 5 }: DockedVis
backdropFilter: 'blur(4px)',
})}
>
{/* Detected value */}
{/* Model loading or detected value */}
<div className={css({ display: 'flex', alignItems: 'center', gap: 2 })}>
<span
className={css({
fontSize: 'lg',
fontWeight: 'bold',
color: 'white',
fontFamily: 'mono',
})}
>
{detectedValue !== null ? detectedValue : '---'}
</span>
{detectedValue !== null && (
<span className={css({ fontSize: 'xs', color: 'gray.400' })}>
{Math.round(confidence * 100)}%
{classifier.isLoading ? (
<span className={css({ fontSize: 'xs', color: 'yellow.400' })}>
Loading model...
</span>
) : !classifier.isModelLoaded ? (
<span className={css({ fontSize: 'xs', color: 'gray.500' })}>
Model unavailable
</span>
) : (
<>
<span
className={css({
fontSize: 'lg',
fontWeight: 'bold',
color: 'white',
fontFamily: 'mono',
})}
>
{detectedValue !== null ? detectedValue : '---'}
</span>
{detectedValue !== null && (
<span className={css({ fontSize: 'xs', color: 'gray.400' })}>
{Math.round(confidence * 100)}%
</span>
)}
</>
)}
</div>
{/* Stability indicator */}
<div className={css({ display: 'flex', alignItems: 'center', gap: 1 })}>
{stability.consecutiveFrames > 0 && (
{classifier.isModelLoaded && stability.consecutiveFrames > 0 && (
<div className={css({ display: 'flex', gap: 0.5 })}>
{Array.from({ length: 3 }).map((_, i) => (
<div