feat(flowcharts): add Problem Trace and unify answer computation (Phases 3-5)
Phase 3 - Mermaid Highlighting: - Add highlightedNodeId prop to DebugMermaidDiagram for trace hover highlighting - Cyan dashed border distinguishes trace hover from walker progress (amber) Phase 4 - Problem Trace Component: - Create ProblemTrace.tsx displaying step-by-step computation trace - Shows node title, transforms applied, working problem evolution - Timeline UI with expand/collapse for each step - Integrate into WorksheetDebugPanel expanded details Phase 5 - Unified Answer Computation: - Update WorksheetDebugPanel to use simulateWalk + extractAnswer - Update worksheet-generator.ts to use unified computation path - Update test-case-validator.ts runTestCaseWithFlowchart to use simulateWalk - All places with full ExecutableFlowchart now use single code path Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -86,6 +86,7 @@ export default function WorkshopPage() {
|
||||
const [executableFlowchart, setExecutableFlowchart] = useState<ExecutableFlowchart | null>(null)
|
||||
const [isExportingPDF, setIsExportingPDF] = useState(false)
|
||||
const [showCreatePdfModal, setShowCreatePdfModal] = useState(false)
|
||||
const [highlightedNodeId, setHighlightedNodeId] = useState<string | null>(null)
|
||||
|
||||
// Examples for worksheet generation
|
||||
const [worksheetExamples, setWorksheetExamples] = useState<GeneratedExample[]>([])
|
||||
@@ -390,11 +391,11 @@ export default function WorkshopPage() {
|
||||
state: 'refining',
|
||||
draftDefinitionJson:
|
||||
data.draftDefinitionJson || JSON.stringify(parsedDefinition),
|
||||
draftMermaidContent: mermaidContent,
|
||||
draftTitle: title,
|
||||
draftDescription: description,
|
||||
draftDifficulty: difficulty,
|
||||
draftEmoji: emoji,
|
||||
draftMermaidContent: mermaidContent ?? null,
|
||||
draftTitle: title ?? null,
|
||||
draftDescription: description ?? null,
|
||||
draftDifficulty: difficulty ?? null,
|
||||
draftEmoji: emoji ?? null,
|
||||
draftNotes: data.draftNotes || JSON.stringify(parsedNotes),
|
||||
currentReasoningText: null, // Clear on completion
|
||||
}
|
||||
@@ -609,6 +610,21 @@ export default function WorkshopPage() {
|
||||
}
|
||||
}, [refinementText, selectedDiagnostics, sessionId])
|
||||
|
||||
// Handler for updating the definition directly (e.g., adding test cases)
|
||||
const handleUpdateDefinition = useCallback(
|
||||
(updatedDefinition: FlowchartDefinition) => {
|
||||
setSession((prev) =>
|
||||
prev
|
||||
? {
|
||||
...prev,
|
||||
draftDefinitionJson: JSON.stringify(updatedDefinition),
|
||||
}
|
||||
: null
|
||||
)
|
||||
},
|
||||
[]
|
||||
)
|
||||
|
||||
// Helper to check if two diagnostics are the same
|
||||
// Must compare code, location, AND message because multiple diagnostics
|
||||
// can have the same code and location (e.g., two unknown refs in same field)
|
||||
@@ -1172,6 +1188,7 @@ export default function WorkshopPage() {
|
||||
<DebugMermaidDiagram
|
||||
mermaidContent={session.draftMermaidContent || ''}
|
||||
currentNodeId=""
|
||||
highlightedNodeId={highlightedNodeId ?? undefined}
|
||||
onRegenerate={handleGenerate}
|
||||
isRegenerating={isGenerating}
|
||||
/>
|
||||
@@ -1238,7 +1255,11 @@ export default function WorkshopPage() {
|
||||
{activeTab === 'structure' && <StructureTab definition={definition} notes={notes} />}
|
||||
{activeTab === 'input' && <InputTab definition={definition} />}
|
||||
{activeTab === 'tests' && (
|
||||
<TestsTab definition={definition} validationReport={testValidationReport} />
|
||||
<TestsTab
|
||||
definition={definition}
|
||||
validationReport={testValidationReport}
|
||||
onUpdateDefinition={handleUpdateDefinition}
|
||||
/>
|
||||
)}
|
||||
{activeTab === 'worksheet' && executableFlowchart && (
|
||||
<div className={vstack({ gap: '4', alignItems: 'stretch' })}>
|
||||
@@ -1269,7 +1290,11 @@ export default function WorkshopPage() {
|
||||
Create PDF Worksheet
|
||||
</button>
|
||||
{/* Debug Panel - shows generated examples with answers */}
|
||||
<WorksheetDebugPanel flowchart={executableFlowchart} problemCount={10} />
|
||||
<WorksheetDebugPanel
|
||||
flowchart={executableFlowchart}
|
||||
problemCount={10}
|
||||
onHoverNode={setHighlightedNodeId}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
{activeTab === 'worksheet' && !executableFlowchart && (
|
||||
|
||||
@@ -6,8 +6,10 @@ import { css } from '../../../styled-system/css'
|
||||
interface DebugMermaidDiagramProps {
|
||||
/** Raw mermaid content */
|
||||
mermaidContent: string
|
||||
/** Current node ID to highlight */
|
||||
currentNodeId: string
|
||||
/** Current node ID to highlight (amber fill - walker progress) */
|
||||
currentNodeId?: string
|
||||
/** Highlighted node ID for trace hover (cyan border - distinct from current) */
|
||||
highlightedNodeId?: string
|
||||
/** Callback when regeneration is requested (shown when there's a render error) */
|
||||
onRegenerate?: () => void
|
||||
/** Whether regeneration is currently in progress */
|
||||
@@ -23,6 +25,7 @@ interface DebugMermaidDiagramProps {
|
||||
export function DebugMermaidDiagram({
|
||||
mermaidContent,
|
||||
currentNodeId,
|
||||
highlightedNodeId,
|
||||
onRegenerate,
|
||||
isRegenerating,
|
||||
}: DebugMermaidDiagramProps) {
|
||||
@@ -63,12 +66,13 @@ export function DebugMermaidDiagram({
|
||||
.replace(/\\"/g, "'") // Convert \" to '
|
||||
.replace(/\\'/g, "'") // Convert \' to '
|
||||
|
||||
// Add style definition to highlight the current node (only if a node ID is provided)
|
||||
// We append this to the mermaid content
|
||||
const highlightStyle = currentNodeId
|
||||
? `\n style ${currentNodeId} fill:#fbbf24,stroke:#d97706,stroke-width:4px,color:#000`
|
||||
: ''
|
||||
const contentWithHighlight = sanitizedContent + highlightStyle
|
||||
// Add style definitions for current node highlighting (walker progress)
|
||||
let highlightStyles = ''
|
||||
if (currentNodeId) {
|
||||
highlightStyles += `\n style ${currentNodeId} fill:#fbbf24,stroke:#d97706,stroke-width:4px,color:#000`
|
||||
}
|
||||
|
||||
const contentWithHighlight = sanitizedContent + highlightStyles
|
||||
|
||||
// Render the diagram
|
||||
const { svg } = await mermaid.render(id, contentWithHighlight)
|
||||
@@ -81,6 +85,30 @@ export function DebugMermaidDiagram({
|
||||
if (svgElement) {
|
||||
svgElement.style.maxWidth = '100%'
|
||||
svgElement.style.height = 'auto'
|
||||
|
||||
// Apply highlighted node style post-render
|
||||
if (highlightedNodeId && highlightedNodeId !== currentNodeId) {
|
||||
// Dim all nodes
|
||||
const allNodes = svgElement.querySelectorAll('[id^="flowchart-"]')
|
||||
allNodes.forEach((node) => {
|
||||
;(node as SVGElement).style.opacity = '0.85'
|
||||
})
|
||||
|
||||
// Highlight the target node
|
||||
const nodeElement = svgElement.querySelector(`[id*="flowchart-${highlightedNodeId}-"]`)
|
||||
if (nodeElement) {
|
||||
const svgNode = nodeElement as SVGElement
|
||||
svgNode.style.opacity = '1'
|
||||
|
||||
// Add thick cyan border with non-scaling stroke
|
||||
const shape = nodeElement.querySelector('rect, polygon, circle, ellipse, path')
|
||||
if (shape) {
|
||||
shape.setAttribute('stroke', '#06b6d4') // cyan-500
|
||||
shape.setAttribute('stroke-width', '3')
|
||||
shape.setAttribute('vector-effect', 'non-scaling-stroke')
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -106,7 +134,7 @@ export function DebugMermaidDiagram({
|
||||
return () => {
|
||||
mounted = false
|
||||
}
|
||||
}, [mermaidContent, currentNodeId])
|
||||
}, [mermaidContent, currentNodeId, highlightedNodeId])
|
||||
|
||||
if (error) {
|
||||
return (
|
||||
|
||||
392
apps/web/src/components/flowchart/ProblemTrace.tsx
Normal file
392
apps/web/src/components/flowchart/ProblemTrace.tsx
Normal file
@@ -0,0 +1,392 @@
|
||||
'use client'
|
||||
|
||||
import { useState } from 'react'
|
||||
import type { StateSnapshot, TransformExpression, ProblemValue } from '@/lib/flowcharts/schema'
|
||||
import { css } from '../../../styled-system/css'
|
||||
import { vstack, hstack } from '../../../styled-system/patterns'
|
||||
|
||||
interface ProblemTraceProps {
|
||||
/** Snapshots from simulateWalk - each represents state after visiting a node */
|
||||
snapshots: StateSnapshot[]
|
||||
/** Callback when hovering over a trace step (for mermaid highlighting) */
|
||||
onHoverStep?: (nodeId: string | null) => void
|
||||
/** Whether to show expanded state details by default */
|
||||
defaultExpanded?: boolean
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a snapshot has content worth expanding
|
||||
*/
|
||||
function hasExpandableContent(
|
||||
snapshot: StateSnapshot,
|
||||
prevSnapshot: StateSnapshot | null
|
||||
): boolean {
|
||||
// Has transforms
|
||||
if (snapshot.transforms.length > 0) return true
|
||||
|
||||
// Is first snapshot with working problem
|
||||
if (snapshot.workingProblem && !prevSnapshot) return true
|
||||
|
||||
// Has working problem that changed from previous
|
||||
if (
|
||||
snapshot.workingProblem &&
|
||||
prevSnapshot?.workingProblem &&
|
||||
snapshot.workingProblem !== prevSnapshot.workingProblem
|
||||
) {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
/**
|
||||
* ProblemTrace - Visualizes the step-by-step computation trace of a problem.
|
||||
*
|
||||
* Shows:
|
||||
* - Each node visited during the walk
|
||||
* - Transforms applied at each node (key: expr → result)
|
||||
* - Working problem evolution
|
||||
* - Hover interaction for mermaid diagram highlighting
|
||||
*/
|
||||
export function ProblemTrace({ snapshots, onHoverStep, defaultExpanded = false }: ProblemTraceProps) {
|
||||
const [expandedSteps, setExpandedSteps] = useState<Set<number>>(
|
||||
defaultExpanded ? new Set(snapshots.map((_, i) => i)) : new Set()
|
||||
)
|
||||
|
||||
const toggleStep = (index: number) => {
|
||||
setExpandedSteps((prev) => {
|
||||
const next = new Set(prev)
|
||||
if (next.has(index)) {
|
||||
next.delete(index)
|
||||
} else {
|
||||
next.add(index)
|
||||
}
|
||||
return next
|
||||
})
|
||||
}
|
||||
|
||||
if (snapshots.length === 0) {
|
||||
return (
|
||||
<div
|
||||
data-element="empty-trace"
|
||||
className={css({
|
||||
padding: '4',
|
||||
color: { base: 'gray.500', _dark: 'gray.400' },
|
||||
fontSize: 'sm',
|
||||
textAlign: 'center',
|
||||
})}
|
||||
>
|
||||
No trace available
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
return (
|
||||
<div
|
||||
data-component="problem-trace"
|
||||
className={vstack({
|
||||
gap: '0',
|
||||
alignItems: 'stretch',
|
||||
width: '100%',
|
||||
})}
|
||||
>
|
||||
{snapshots.map((snapshot, index) => {
|
||||
const isExpanded = expandedSteps.has(index)
|
||||
const isInitial = snapshot.nodeId === 'initial'
|
||||
const isLast = index === snapshots.length - 1
|
||||
const prevSnapshot = index > 0 ? snapshots[index - 1] : null
|
||||
const isExpandable = hasExpandableContent(snapshot, prevSnapshot)
|
||||
|
||||
return (
|
||||
<div
|
||||
key={`${snapshot.nodeId}-${snapshot.timestamp}`}
|
||||
data-element="trace-step"
|
||||
data-node-id={snapshot.nodeId}
|
||||
className={css({
|
||||
position: 'relative',
|
||||
paddingLeft: '5',
|
||||
paddingBottom: isLast ? '0' : '1',
|
||||
})}
|
||||
onMouseEnter={() => !isInitial && onHoverStep?.(snapshot.nodeId)}
|
||||
onMouseLeave={() => onHoverStep?.(null)}
|
||||
>
|
||||
{/* Vertical line connecting steps */}
|
||||
{!isLast && (
|
||||
<div
|
||||
className={css({
|
||||
position: 'absolute',
|
||||
left: '7px',
|
||||
top: '14px',
|
||||
bottom: '0',
|
||||
width: '2px',
|
||||
backgroundColor: { base: 'gray.200', _dark: 'gray.700' },
|
||||
})}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Step dot */}
|
||||
<div
|
||||
className={css({
|
||||
position: 'absolute',
|
||||
left: '2px',
|
||||
top: '6px',
|
||||
width: '12px',
|
||||
height: '12px',
|
||||
borderRadius: 'full',
|
||||
backgroundColor: isInitial
|
||||
? { base: 'blue.500', _dark: 'blue.400' }
|
||||
: isLast
|
||||
? { base: 'green.500', _dark: 'green.400' }
|
||||
: { base: 'gray.300', _dark: 'gray.600' },
|
||||
border: '2px solid',
|
||||
borderColor: { base: 'white', _dark: 'gray.900' },
|
||||
})}
|
||||
/>
|
||||
|
||||
{/* Step content */}
|
||||
<div
|
||||
className={css({
|
||||
borderRadius: 'md',
|
||||
overflow: 'hidden',
|
||||
cursor: isExpandable ? 'pointer' : 'default',
|
||||
transition: 'all 0.15s',
|
||||
_hover: isExpandable
|
||||
? {
|
||||
backgroundColor: { base: 'gray.50', _dark: 'gray.800/50' },
|
||||
}
|
||||
: {},
|
||||
})}
|
||||
onClick={() => isExpandable && toggleStep(index)}
|
||||
>
|
||||
{/* Step header - compact single line */}
|
||||
<div
|
||||
className={hstack({
|
||||
gap: '2',
|
||||
paddingY: '1',
|
||||
paddingX: '2',
|
||||
})}
|
||||
>
|
||||
{/* Expand/collapse indicator - only show if expandable */}
|
||||
{isExpandable ? (
|
||||
<span
|
||||
className={css({
|
||||
fontSize: '10px',
|
||||
color: { base: 'gray.400', _dark: 'gray.500' },
|
||||
transition: 'transform 0.15s',
|
||||
transform: isExpanded ? 'rotate(90deg)' : 'rotate(0deg)',
|
||||
width: '10px',
|
||||
flexShrink: 0,
|
||||
})}
|
||||
>
|
||||
▶
|
||||
</span>
|
||||
) : (
|
||||
<span className={css({ width: '10px', flexShrink: 0 })} />
|
||||
)}
|
||||
|
||||
{/* Node title */}
|
||||
<span
|
||||
className={css({
|
||||
fontSize: 'xs',
|
||||
fontWeight: 'medium',
|
||||
color: { base: 'gray.600', _dark: 'gray.300' },
|
||||
flex: 1,
|
||||
overflow: 'hidden',
|
||||
textOverflow: 'ellipsis',
|
||||
whiteSpace: 'nowrap',
|
||||
})}
|
||||
>
|
||||
{snapshot.nodeTitle || snapshot.nodeId}
|
||||
</span>
|
||||
|
||||
{/* Transform count badge */}
|
||||
{snapshot.transforms.length > 0 && (
|
||||
<span
|
||||
className={css({
|
||||
fontSize: '10px',
|
||||
paddingX: '1.5',
|
||||
paddingY: '0.5',
|
||||
borderRadius: 'sm',
|
||||
backgroundColor: { base: 'blue.100', _dark: 'blue.900/50' },
|
||||
color: { base: 'blue.600', _dark: 'blue.300' },
|
||||
fontWeight: 'medium',
|
||||
})}
|
||||
>
|
||||
{snapshot.transforms.length}
|
||||
</span>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Expanded content */}
|
||||
{isExpanded && isExpandable && (
|
||||
<div
|
||||
className={vstack({
|
||||
gap: '2',
|
||||
paddingX: '2',
|
||||
paddingBottom: '2',
|
||||
paddingTop: '1',
|
||||
alignItems: 'stretch',
|
||||
})}
|
||||
>
|
||||
{/* Working problem evolution - only show if it changed from previous */}
|
||||
{snapshot.workingProblem &&
|
||||
prevSnapshot?.workingProblem &&
|
||||
snapshot.workingProblem !== prevSnapshot.workingProblem && (
|
||||
<div
|
||||
data-element="working-problem-change"
|
||||
className={css({
|
||||
padding: '2',
|
||||
backgroundColor: { base: 'amber.50', _dark: 'amber.900/20' },
|
||||
borderRadius: 'md',
|
||||
fontSize: 'xs',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'amber.200', _dark: 'amber.800/30' },
|
||||
})}
|
||||
>
|
||||
<div className={hstack({ gap: '2', alignItems: 'center', flexWrap: 'wrap' })}>
|
||||
<span
|
||||
className={css({
|
||||
color: { base: 'gray.500', _dark: 'gray.400' },
|
||||
fontFamily: 'mono',
|
||||
})}
|
||||
>
|
||||
{prevSnapshot.workingProblem}
|
||||
</span>
|
||||
<span className={css({ color: { base: 'amber.500', _dark: 'amber.400' } })}>→</span>
|
||||
<span
|
||||
className={css({
|
||||
fontWeight: 'semibold',
|
||||
color: { base: 'gray.800', _dark: 'gray.100' },
|
||||
fontFamily: 'mono',
|
||||
})}
|
||||
>
|
||||
{snapshot.workingProblem}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* First snapshot - just show initial working problem */}
|
||||
{snapshot.workingProblem && !prevSnapshot && (
|
||||
<div
|
||||
data-element="initial-working-problem"
|
||||
className={css({
|
||||
padding: '2',
|
||||
backgroundColor: { base: 'blue.50', _dark: 'blue.900/20' },
|
||||
borderRadius: 'md',
|
||||
fontSize: 'xs',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'blue.200', _dark: 'blue.800/30' },
|
||||
})}
|
||||
>
|
||||
<span className={css({ color: { base: 'gray.500', _dark: 'gray.400' } })}>Problem: </span>
|
||||
<span
|
||||
className={css({
|
||||
fontWeight: 'semibold',
|
||||
color: { base: 'gray.800', _dark: 'gray.100' },
|
||||
fontFamily: 'mono',
|
||||
})}
|
||||
>
|
||||
{snapshot.workingProblem}
|
||||
</span>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Transforms */}
|
||||
{snapshot.transforms.length > 0 && (
|
||||
<div data-element="transforms" className={vstack({ gap: '1', alignItems: 'stretch' })}>
|
||||
{snapshot.transforms.map((transform, tIndex) => (
|
||||
<TransformDisplay
|
||||
key={`${transform.key}-${tIndex}`}
|
||||
transform={transform}
|
||||
result={snapshot.values[transform.key]}
|
||||
/>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// =============================================================================
|
||||
// Helper Components
|
||||
// =============================================================================
|
||||
|
||||
interface TransformDisplayProps {
|
||||
transform: TransformExpression
|
||||
result: ProblemValue
|
||||
}
|
||||
|
||||
/**
|
||||
* Displays a single transform: key = expr → result
|
||||
*/
|
||||
function TransformDisplay({ transform, result }: TransformDisplayProps) {
|
||||
return (
|
||||
<div
|
||||
data-element="transform"
|
||||
className={css({
|
||||
fontFamily: 'mono',
|
||||
fontSize: '11px',
|
||||
padding: '1.5 2',
|
||||
backgroundColor: { base: 'gray.100', _dark: 'gray.800' },
|
||||
borderRadius: 'sm',
|
||||
display: 'flex',
|
||||
flexWrap: 'wrap',
|
||||
gap: '1',
|
||||
alignItems: 'center',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'gray.200', _dark: 'gray.700' },
|
||||
})}
|
||||
>
|
||||
{/* Key name */}
|
||||
<span className={css({ color: { base: 'purple.600', _dark: 'purple.400' }, fontWeight: 'semibold' })}>
|
||||
{transform.key}
|
||||
</span>
|
||||
|
||||
<span className={css({ color: { base: 'gray.400', _dark: 'gray.500' } })}>=</span>
|
||||
|
||||
{/* Expression (truncated if long) */}
|
||||
<span
|
||||
className={css({
|
||||
color: { base: 'gray.500', _dark: 'gray.400' },
|
||||
maxWidth: '150px',
|
||||
overflow: 'hidden',
|
||||
textOverflow: 'ellipsis',
|
||||
whiteSpace: 'nowrap',
|
||||
})}
|
||||
title={transform.expr}
|
||||
>
|
||||
{transform.expr}
|
||||
</span>
|
||||
|
||||
<span className={css({ color: { base: 'gray.400', _dark: 'gray.500' } })}>→</span>
|
||||
|
||||
{/* Result */}
|
||||
<span className={css({ color: { base: 'green.600', _dark: 'green.400' }, fontWeight: 'semibold' })}>
|
||||
{formatResult(result)}
|
||||
</span>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a ProblemValue for display
|
||||
*/
|
||||
function formatResult(value: ProblemValue): string {
|
||||
if (value === null || value === undefined) return 'null'
|
||||
if (typeof value === 'boolean') return value ? 'true' : 'false'
|
||||
if (typeof value === 'object' && 'denom' in value) {
|
||||
// Mixed number
|
||||
const { whole, num, denom } = value
|
||||
if (whole === 0) return `${num}/${denom}`
|
||||
if (num === 0) return String(whole)
|
||||
return `${whole} ${num}/${denom}`
|
||||
}
|
||||
return String(value)
|
||||
}
|
||||
@@ -17,6 +17,9 @@ interface TestsTabProps {
|
||||
onUpdateDefinition?: (definition: FlowchartDefinition) => void
|
||||
}
|
||||
|
||||
/** Index of the test being edited, or null if not editing */
|
||||
type EditingState = { index: number; example: ProblemExample } | null
|
||||
|
||||
/**
|
||||
* Tests tab for the flowchart workshop.
|
||||
* Shows test case results and allows adding new tests.
|
||||
@@ -29,6 +32,7 @@ export function TestsTab({
|
||||
const [localValidationReport, setLocalValidationReport] = useState<ValidationReport | null>(null)
|
||||
const [isRunning, setIsRunning] = useState(false)
|
||||
const [showAddTestForm, setShowAddTestForm] = useState(false)
|
||||
const [editingTest, setEditingTest] = useState<EditingState>(null)
|
||||
|
||||
// Use external report if provided, otherwise compute locally
|
||||
const validationReport = externalReport ?? localValidationReport
|
||||
@@ -92,6 +96,59 @@ export function TestsTab({
|
||||
[definition, onUpdateDefinition]
|
||||
)
|
||||
|
||||
// Handle updating an existing test case
|
||||
const handleUpdateTest = useCallback(
|
||||
(index: number, example: ProblemExample) => {
|
||||
if (!definition || !onUpdateDefinition) return
|
||||
|
||||
const existingExamples = definition.problemInput.examples || []
|
||||
const updatedExamples = [...existingExamples]
|
||||
updatedExamples[index] = example
|
||||
|
||||
const updatedDefinition: FlowchartDefinition = {
|
||||
...definition,
|
||||
problemInput: {
|
||||
...definition.problemInput,
|
||||
examples: updatedExamples,
|
||||
},
|
||||
}
|
||||
onUpdateDefinition(updatedDefinition)
|
||||
setEditingTest(null)
|
||||
},
|
||||
[definition, onUpdateDefinition]
|
||||
)
|
||||
|
||||
// Handle deleting a test case
|
||||
const handleDeleteTest = useCallback(
|
||||
(index: number) => {
|
||||
if (!definition || !onUpdateDefinition) return
|
||||
|
||||
const existingExamples = definition.problemInput.examples || []
|
||||
const updatedExamples = existingExamples.filter((_, i) => i !== index)
|
||||
|
||||
const updatedDefinition: FlowchartDefinition = {
|
||||
...definition,
|
||||
problemInput: {
|
||||
...definition.problemInput,
|
||||
examples: updatedExamples,
|
||||
},
|
||||
}
|
||||
onUpdateDefinition(updatedDefinition)
|
||||
},
|
||||
[definition, onUpdateDefinition]
|
||||
)
|
||||
|
||||
// Find the index of a test case in the examples array
|
||||
const findTestIndex = useCallback(
|
||||
(example: ProblemExample): number => {
|
||||
const examples = definition?.problemInput.examples || []
|
||||
return examples.findIndex(
|
||||
(ex) => ex.name === example.name && ex.expectedAnswer === example.expectedAnswer
|
||||
)
|
||||
},
|
||||
[definition]
|
||||
)
|
||||
|
||||
if (!definition) {
|
||||
return (
|
||||
<p className={css({ color: { base: 'gray.500', _dark: 'gray.400' } })}>
|
||||
@@ -184,9 +241,18 @@ export function TestsTab({
|
||||
>
|
||||
Test Results
|
||||
</h4>
|
||||
{validationReport.results.map((result, index) => (
|
||||
<TestResultRow key={index} result={result} />
|
||||
))}
|
||||
{validationReport.results.map((result, index) => {
|
||||
const testIndex = findTestIndex(result.example)
|
||||
return (
|
||||
<TestResultRow
|
||||
key={index}
|
||||
result={result}
|
||||
canEdit={!!onUpdateDefinition}
|
||||
onEdit={() => setEditingTest({ index: testIndex, example: result.example })}
|
||||
onDelete={() => handleDeleteTest(testIndex)}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -299,8 +365,18 @@ export function TestsTab({
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Edit Test Form */}
|
||||
{editingTest && definition && (
|
||||
<EditTestForm
|
||||
definition={definition}
|
||||
example={editingTest.example}
|
||||
onSave={(example) => handleUpdateTest(editingTest.index, example)}
|
||||
onCancel={() => setEditingTest(null)}
|
||||
/>
|
||||
)}
|
||||
|
||||
{/* Add Test Form */}
|
||||
{showAddTestForm && definition && (
|
||||
{showAddTestForm && definition && !editingTest && (
|
||||
<AddTestForm
|
||||
definition={definition}
|
||||
onAdd={handleAddTest}
|
||||
@@ -337,7 +413,17 @@ export function TestsTab({
|
||||
/**
|
||||
* Single test result row
|
||||
*/
|
||||
function TestResultRow({ result }: { result: TestResult }) {
|
||||
function TestResultRow({
|
||||
result,
|
||||
canEdit,
|
||||
onEdit,
|
||||
onDelete,
|
||||
}: {
|
||||
result: TestResult
|
||||
canEdit?: boolean
|
||||
onEdit?: () => void
|
||||
onDelete?: () => void
|
||||
}) {
|
||||
const [isExpanded, setIsExpanded] = useState(!result.passed)
|
||||
|
||||
return (
|
||||
@@ -363,7 +449,7 @@ function TestResultRow({ result }: { result: TestResult }) {
|
||||
})}
|
||||
onClick={() => setIsExpanded(!isExpanded)}
|
||||
>
|
||||
<div className={hstack({ gap: '2' })}>
|
||||
<div className={hstack({ gap: '2', flex: 1 })}>
|
||||
<span className={css({ fontSize: 'sm' })}>{result.passed ? '✓' : '✗'}</span>
|
||||
<span
|
||||
className={css({
|
||||
@@ -377,14 +463,68 @@ function TestResultRow({ result }: { result: TestResult }) {
|
||||
{result.example.name}
|
||||
</span>
|
||||
</div>
|
||||
<span
|
||||
className={css({
|
||||
fontSize: 'xs',
|
||||
color: { base: 'gray.400', _dark: 'gray.500' },
|
||||
})}
|
||||
>
|
||||
{isExpanded ? '▼' : '▶'}
|
||||
</span>
|
||||
<div className={hstack({ gap: '1' })}>
|
||||
{canEdit && (
|
||||
<>
|
||||
<button
|
||||
data-action="edit-test"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
onEdit?.()
|
||||
}}
|
||||
className={css({
|
||||
padding: '1',
|
||||
fontSize: 'xs',
|
||||
color: { base: 'gray.500', _dark: 'gray.400' },
|
||||
backgroundColor: 'transparent',
|
||||
border: 'none',
|
||||
cursor: 'pointer',
|
||||
borderRadius: 'sm',
|
||||
_hover: {
|
||||
color: { base: 'blue.600', _dark: 'blue.400' },
|
||||
backgroundColor: { base: 'blue.50', _dark: 'blue.900/30' },
|
||||
},
|
||||
})}
|
||||
title="Edit test"
|
||||
>
|
||||
✏️
|
||||
</button>
|
||||
<button
|
||||
data-action="delete-test"
|
||||
onClick={(e) => {
|
||||
e.stopPropagation()
|
||||
if (confirm('Delete this test case?')) {
|
||||
onDelete?.()
|
||||
}
|
||||
}}
|
||||
className={css({
|
||||
padding: '1',
|
||||
fontSize: 'xs',
|
||||
color: { base: 'gray.500', _dark: 'gray.400' },
|
||||
backgroundColor: 'transparent',
|
||||
border: 'none',
|
||||
cursor: 'pointer',
|
||||
borderRadius: 'sm',
|
||||
_hover: {
|
||||
color: { base: 'red.600', _dark: 'red.400' },
|
||||
backgroundColor: { base: 'red.50', _dark: 'red.900/30' },
|
||||
},
|
||||
})}
|
||||
title="Delete test"
|
||||
>
|
||||
🗑️
|
||||
</button>
|
||||
</>
|
||||
)}
|
||||
<span
|
||||
className={css({
|
||||
fontSize: 'xs',
|
||||
color: { base: 'gray.400', _dark: 'gray.500' },
|
||||
})}
|
||||
>
|
||||
{isExpanded ? '▼' : '▶'}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{isExpanded && (
|
||||
@@ -706,3 +846,279 @@ function AddTestForm({
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Form for editing an existing test case
|
||||
*/
|
||||
function EditTestForm({
|
||||
definition,
|
||||
example,
|
||||
onSave,
|
||||
onCancel,
|
||||
}: {
|
||||
definition: FlowchartDefinition
|
||||
example: ProblemExample
|
||||
onSave: (example: ProblemExample) => void
|
||||
onCancel: () => void
|
||||
}) {
|
||||
const [name, setName] = useState(example.name)
|
||||
const [expectedAnswer, setExpectedAnswer] = useState(example.expectedAnswer || '')
|
||||
const [values, setValues] = useState<Record<string, string>>({})
|
||||
|
||||
// Initialize values from example
|
||||
useEffect(() => {
|
||||
const initialValues: Record<string, string> = {}
|
||||
for (const field of definition.problemInput.fields) {
|
||||
if (field.type === 'mixed-number') {
|
||||
const val = example.values[field.name] as { whole?: number; num?: number; denom?: number } | undefined
|
||||
initialValues[`${field.name}Whole`] = String(val?.whole ?? '')
|
||||
initialValues[`${field.name}Num`] = String(val?.num ?? '')
|
||||
initialValues[`${field.name}Denom`] = String(val?.denom ?? '')
|
||||
} else {
|
||||
initialValues[field.name] = String(example.values[field.name] ?? '')
|
||||
}
|
||||
}
|
||||
setValues(initialValues)
|
||||
}, [definition, example])
|
||||
|
||||
const handleSubmit = useCallback(() => {
|
||||
if (!name.trim() || !expectedAnswer.trim()) return
|
||||
|
||||
// Convert string values to proper types
|
||||
const typedValues: Record<string, ProblemValue> = {}
|
||||
for (const field of definition.problemInput.fields) {
|
||||
if (field.type === 'mixed-number') {
|
||||
typedValues[field.name] = {
|
||||
whole: Number(values[`${field.name}Whole`]) || 0,
|
||||
num: Number(values[`${field.name}Num`]) || 0,
|
||||
denom: Number(values[`${field.name}Denom`]) || 1,
|
||||
}
|
||||
} else if (field.type === 'integer' || field.type === 'number') {
|
||||
typedValues[field.name] = Number(values[field.name]) || 0
|
||||
} else {
|
||||
typedValues[field.name] = values[field.name] || ''
|
||||
}
|
||||
}
|
||||
|
||||
onSave({
|
||||
name: name.trim(),
|
||||
description: example.description,
|
||||
values: typedValues,
|
||||
expectedAnswer: expectedAnswer.trim(),
|
||||
})
|
||||
}, [name, expectedAnswer, values, definition, example.description, onSave])
|
||||
|
||||
return (
|
||||
<div
|
||||
data-element="edit-test-form"
|
||||
className={css({
|
||||
padding: '3',
|
||||
borderRadius: 'md',
|
||||
backgroundColor: { base: 'blue.50', _dark: 'blue.900/20' },
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'blue.200', _dark: 'blue.700' },
|
||||
})}
|
||||
>
|
||||
<h4
|
||||
className={css({
|
||||
fontWeight: 'medium',
|
||||
marginBottom: '3',
|
||||
color: { base: 'gray.800', _dark: 'gray.200' },
|
||||
})}
|
||||
>
|
||||
Edit Test Case
|
||||
</h4>
|
||||
|
||||
<div className={vstack({ gap: '2', alignItems: 'stretch' })}>
|
||||
{/* Test name */}
|
||||
<div>
|
||||
<label
|
||||
className={css({
|
||||
fontSize: 'xs',
|
||||
color: { base: 'gray.600', _dark: 'gray.400' },
|
||||
})}
|
||||
>
|
||||
Test Name
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={name}
|
||||
onChange={(e) => setName(e.target.value)}
|
||||
className={css({
|
||||
width: '100%',
|
||||
padding: '2',
|
||||
borderRadius: 'md',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'gray.300', _dark: 'gray.600' },
|
||||
backgroundColor: { base: 'white', _dark: 'gray.900' },
|
||||
fontSize: 'sm',
|
||||
})}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Input values */}
|
||||
<div>
|
||||
<label
|
||||
className={css({
|
||||
fontSize: 'xs',
|
||||
color: { base: 'gray.600', _dark: 'gray.400' },
|
||||
})}
|
||||
>
|
||||
Input Values
|
||||
</label>
|
||||
<div
|
||||
className={css({ display: 'grid', gridTemplateColumns: 'repeat(2, 1fr)', gap: '2' })}
|
||||
>
|
||||
{definition.problemInput.fields.map((field) => {
|
||||
if (field.type === 'mixed-number') {
|
||||
return (
|
||||
<div key={field.name} className={css({ gridColumn: 'span 2' })}>
|
||||
<span className={css({ fontSize: 'xs' })}>{field.label || field.name}:</span>
|
||||
<div className={hstack({ gap: '1' })}>
|
||||
<input
|
||||
type="number"
|
||||
value={values[`${field.name}Whole`] || ''}
|
||||
onChange={(e) =>
|
||||
setValues({ ...values, [`${field.name}Whole`]: e.target.value })
|
||||
}
|
||||
placeholder="Whole"
|
||||
className={css({
|
||||
width: '60px',
|
||||
padding: '1',
|
||||
borderRadius: 'sm',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'gray.300', _dark: 'gray.600' },
|
||||
fontSize: 'sm',
|
||||
})}
|
||||
/>
|
||||
<input
|
||||
type="number"
|
||||
value={values[`${field.name}Num`] || ''}
|
||||
onChange={(e) =>
|
||||
setValues({ ...values, [`${field.name}Num`]: e.target.value })
|
||||
}
|
||||
placeholder="Num"
|
||||
className={css({
|
||||
width: '50px',
|
||||
padding: '1',
|
||||
borderRadius: 'sm',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'gray.300', _dark: 'gray.600' },
|
||||
fontSize: 'sm',
|
||||
})}
|
||||
/>
|
||||
<span>/</span>
|
||||
<input
|
||||
type="number"
|
||||
value={values[`${field.name}Denom`] || ''}
|
||||
onChange={(e) =>
|
||||
setValues({ ...values, [`${field.name}Denom`]: e.target.value })
|
||||
}
|
||||
placeholder="Denom"
|
||||
className={css({
|
||||
width: '50px',
|
||||
padding: '1',
|
||||
borderRadius: 'sm',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'gray.300', _dark: 'gray.600' },
|
||||
fontSize: 'sm',
|
||||
})}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
return (
|
||||
<div key={field.name}>
|
||||
<span className={css({ fontSize: 'xs' })}>{field.label || field.name}:</span>
|
||||
<input
|
||||
type={field.type === 'integer' || field.type === 'number' ? 'number' : 'text'}
|
||||
value={values[field.name] || ''}
|
||||
onChange={(e) => setValues({ ...values, [field.name]: e.target.value })}
|
||||
className={css({
|
||||
width: '100%',
|
||||
padding: '1',
|
||||
borderRadius: 'sm',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'gray.300', _dark: 'gray.600' },
|
||||
fontSize: 'sm',
|
||||
})}
|
||||
/>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Expected answer */}
|
||||
<div>
|
||||
<label
|
||||
className={css({
|
||||
fontSize: 'xs',
|
||||
color: { base: 'gray.600', _dark: 'gray.400' },
|
||||
})}
|
||||
>
|
||||
Expected Answer
|
||||
</label>
|
||||
<input
|
||||
type="text"
|
||||
value={expectedAnswer}
|
||||
onChange={(e) => setExpectedAnswer(e.target.value)}
|
||||
className={css({
|
||||
width: '100%',
|
||||
padding: '2',
|
||||
borderRadius: 'md',
|
||||
border: '1px solid',
|
||||
borderColor: { base: 'gray.300', _dark: 'gray.600' },
|
||||
backgroundColor: { base: 'white', _dark: 'gray.900' },
|
||||
fontSize: 'sm',
|
||||
})}
|
||||
/>
|
||||
</div>
|
||||
|
||||
{/* Buttons */}
|
||||
<div className={hstack({ gap: '2', justifyContent: 'flex-end' })}>
|
||||
<button
|
||||
onClick={onCancel}
|
||||
className={css({
|
||||
padding: '1.5 3',
|
||||
borderRadius: 'md',
|
||||
backgroundColor: { base: 'gray.200', _dark: 'gray.700' },
|
||||
color: { base: 'gray.700', _dark: 'gray.300' },
|
||||
border: 'none',
|
||||
cursor: 'pointer',
|
||||
fontSize: 'sm',
|
||||
_hover: {
|
||||
backgroundColor: { base: 'gray.300', _dark: 'gray.600' },
|
||||
},
|
||||
})}
|
||||
>
|
||||
Cancel
|
||||
</button>
|
||||
<button
|
||||
onClick={handleSubmit}
|
||||
disabled={!name.trim() || !expectedAnswer.trim()}
|
||||
className={css({
|
||||
padding: '1.5 3',
|
||||
borderRadius: 'md',
|
||||
backgroundColor: { base: 'blue.600', _dark: 'blue.500' },
|
||||
color: 'white',
|
||||
border: 'none',
|
||||
cursor: 'pointer',
|
||||
fontSize: 'sm',
|
||||
_hover: {
|
||||
backgroundColor: { base: 'blue.700', _dark: 'blue.600' },
|
||||
},
|
||||
_disabled: {
|
||||
opacity: 0.5,
|
||||
cursor: 'not-allowed',
|
||||
},
|
||||
})}
|
||||
>
|
||||
Save Changes
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
@@ -5,7 +5,8 @@ import type { ExecutableFlowchart, ProblemValue, MixedNumberValue } from '@/lib/
|
||||
import type { GeneratedExample } from '@/lib/flowcharts/loader'
|
||||
import { generateExamplesAsync } from '@/lib/flowcharts/example-generator-client'
|
||||
import { formatProblemDisplay } from '@/lib/flowcharts/formatting'
|
||||
import { evaluateDisplayAnswer } from '@/lib/flowchart-workshop/test-case-validator'
|
||||
import { simulateWalk, extractAnswer } from '@/lib/flowcharts/loader'
|
||||
import { ProblemTrace } from './ProblemTrace'
|
||||
import { css } from '../../../styled-system/css'
|
||||
import { vstack, hstack } from '../../../styled-system/patterns'
|
||||
|
||||
@@ -14,6 +15,8 @@ interface WorksheetDebugPanelProps {
|
||||
flowchart: ExecutableFlowchart
|
||||
/** Number of problems to generate (default: 10) */
|
||||
problemCount?: number
|
||||
/** Callback when hovering over a trace node (for mermaid highlighting) */
|
||||
onHoverNode?: (nodeId: string | null) => void
|
||||
}
|
||||
|
||||
/** Difficulty tier type */
|
||||
@@ -23,7 +26,7 @@ type DifficultyTier = 'easy' | 'medium' | 'hard'
|
||||
* Debug panel for testing worksheet generation.
|
||||
* Shows generated problems with their computed answers, raw values, and difficulty tiers.
|
||||
*/
|
||||
export function WorksheetDebugPanel({ flowchart, problemCount = 10 }: WorksheetDebugPanelProps) {
|
||||
export function WorksheetDebugPanel({ flowchart, problemCount = 10, onHoverNode }: WorksheetDebugPanelProps) {
|
||||
const [examples, setExamples] = useState<GeneratedExample[]>([])
|
||||
const [isLoading, setIsLoading] = useState(true)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
@@ -118,6 +121,28 @@ export function WorksheetDebugPanel({ flowchart, problemCount = 10 }: WorksheetD
|
||||
}
|
||||
}
|
||||
|
||||
// Compute simulations and answers for all examples (unified computation path)
|
||||
const computedExamples = useMemo(() => {
|
||||
return examples.map((example) => {
|
||||
try {
|
||||
const terminalState = simulateWalk(flowchart, example.values)
|
||||
const { display } = extractAnswer(flowchart, terminalState)
|
||||
return {
|
||||
state: terminalState,
|
||||
answerDisplay: display.text || '?',
|
||||
error: null,
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Failed to compute answer for example:', err)
|
||||
return {
|
||||
state: null,
|
||||
answerDisplay: '?',
|
||||
error: err instanceof Error ? err.message : 'Unknown error',
|
||||
}
|
||||
}
|
||||
})
|
||||
}, [examples, flowchart])
|
||||
|
||||
if (isLoading) {
|
||||
return (
|
||||
<div className={css({ padding: '4', textAlign: 'center' })}>
|
||||
@@ -288,10 +313,8 @@ export function WorksheetDebugPanel({ flowchart, problemCount = 10 }: WorksheetD
|
||||
const tierColor = getTierColor(tier)
|
||||
const isExpanded = expandedItems.has(index)
|
||||
const problemDisplay = formatProblemDisplay(flowchart, example.values)
|
||||
const { answer: answerDisplay } = evaluateDisplayAnswer(
|
||||
flowchart.definition,
|
||||
example.values
|
||||
)
|
||||
const computed = computedExamples[index]
|
||||
const answerDisplay = computed?.answerDisplay ?? '?'
|
||||
|
||||
return (
|
||||
<div
|
||||
@@ -514,6 +537,29 @@ export function WorksheetDebugPanel({ flowchart, problemCount = 10 }: WorksheetD
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Computation Trace */}
|
||||
{computed?.state?.snapshots && computed.state.snapshots.length > 0 && (
|
||||
<div data-element="computation-trace-section">
|
||||
<h4
|
||||
className={css({
|
||||
fontSize: 'xs',
|
||||
fontWeight: 'semibold',
|
||||
color: { base: 'gray.600', _dark: 'gray.400' },
|
||||
marginBottom: '2',
|
||||
textTransform: 'uppercase',
|
||||
letterSpacing: 'wide',
|
||||
})}
|
||||
>
|
||||
Computation Trace
|
||||
</h4>
|
||||
<ProblemTrace
|
||||
snapshots={computed.state.snapshots}
|
||||
defaultExpanded={false}
|
||||
onHoverStep={onHoverNode}
|
||||
/>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -1319,6 +1319,7 @@ Every example in \`problemInput.examples\` MUST include an \`expectedAnswer\` fi
|
||||
- Include at least one test case for each major path through the flowchart
|
||||
- Cover edge cases where the answer format might change (e.g., improper fractions becoming whole numbers, "5" vs "5/1")
|
||||
- Ensure test cases exercise all branches of conditional logic in \`display.answer\`
|
||||
- **Handle degenerate computed values**: When intermediate computations can reach boundary conditions (ratio = 1, difference = 0, denominator = 1, etc.), the \`display.answer\` expression must handle these. A multi-part display may need to collapse to fewer parts—test cases should cover inputs that trigger these boundary conditions.
|
||||
|
||||
**Example**:
|
||||
\`\`\`json
|
||||
@@ -1532,6 +1533,7 @@ Your task is to modify the flowchart according to the teacher's request while:
|
||||
- Missing whole number extraction from improper fractions
|
||||
- Division by zero not handled
|
||||
- String concatenation order issues
|
||||
- Computed values reaching boundary conditions (e.g., a computed denominator = 1, ratio = 1, or difference = 0) that should collapse the display to a simpler form
|
||||
|
||||
` +
|
||||
getCriticalRules() +
|
||||
|
||||
@@ -16,7 +16,7 @@ import type {
|
||||
import { evaluate, type EvalContext } from '../flowcharts/evaluator'
|
||||
import { analyzeFlowchart, type FlowchartPath } from '../flowcharts/path-analysis'
|
||||
import type { ExecutableFlowchart } from '../flowcharts/schema'
|
||||
import { loadFlowchart } from '../flowcharts/loader'
|
||||
import { loadFlowchart, simulateWalk, extractAnswer } from '../flowcharts/loader'
|
||||
|
||||
// =============================================================================
|
||||
// Types
|
||||
@@ -225,7 +225,7 @@ export function runTestCase(definition: FlowchartDefinition, example: ProblemExa
|
||||
|
||||
/**
|
||||
* Run a single test case using an ExecutableFlowchart.
|
||||
* Uses evaluateDisplayAnswer - the canonical answer computation function.
|
||||
* Uses simulateWalk + extractAnswer for unified answer computation.
|
||||
*/
|
||||
export function runTestCaseWithFlowchart(
|
||||
flowchart: ExecutableFlowchart,
|
||||
@@ -241,30 +241,32 @@ export function runTestCaseWithFlowchart(
|
||||
}
|
||||
}
|
||||
|
||||
// Use evaluateDisplayAnswer - handles normalization internally
|
||||
const { answer, error } = evaluateDisplayAnswer(flowchart.definition, example.values)
|
||||
// Use simulateWalk + extractAnswer for unified computation
|
||||
try {
|
||||
const terminalState = simulateWalk(flowchart, example.values)
|
||||
const { display: answerDisplay } = extractAnswer(flowchart, terminalState)
|
||||
const answer = answerDisplay.text || null
|
||||
|
||||
if (error) {
|
||||
// Compare after trimming whitespace
|
||||
const normalizedActual = answer?.trim() ?? ''
|
||||
const normalizedExpected = example.expectedAnswer.trim()
|
||||
const passed = normalizedActual === normalizedExpected
|
||||
|
||||
return {
|
||||
example,
|
||||
actualAnswer: answer,
|
||||
expectedAnswer: example.expectedAnswer,
|
||||
passed,
|
||||
}
|
||||
} catch (err) {
|
||||
return {
|
||||
example,
|
||||
actualAnswer: null,
|
||||
expectedAnswer: example.expectedAnswer,
|
||||
passed: false,
|
||||
error,
|
||||
error: err instanceof Error ? err.message : 'Evaluation failed',
|
||||
}
|
||||
}
|
||||
|
||||
// Compare after trimming whitespace
|
||||
const normalizedActual = answer?.trim() ?? ''
|
||||
const normalizedExpected = example.expectedAnswer.trim()
|
||||
const passed = normalizedActual === normalizedExpected
|
||||
|
||||
return {
|
||||
example,
|
||||
actualAnswer: answer,
|
||||
expectedAnswer: example.expectedAnswer,
|
||||
passed,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -307,8 +309,8 @@ export function validateTestCases(definition: FlowchartDefinition): ValidationRe
|
||||
|
||||
/**
|
||||
* Validate test cases with full coverage analysis.
|
||||
* Uses evaluateDisplayAnswer for validation - the same function
|
||||
* that worksheet generation uses to compute answers.
|
||||
* Uses simulateWalk + extractAnswer for validation - the unified
|
||||
* computation path used by worksheet generation.
|
||||
*/
|
||||
export async function validateTestCasesWithCoverage(
|
||||
definition: FlowchartDefinition,
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
|
||||
"display": {
|
||||
"problem": "(leftWhole > 0 ? (leftWhole + ' ' + leftNum + '/' + leftDenom) : (leftNum + '/' + leftDenom)) + ' ' + op + ' ' + (rightWhole > 0 ? (rightWhole + ' ' + rightNum + '/' + rightDenom) : (rightNum + '/' + rightDenom))",
|
||||
"answer": "resultWhole > 0 ? (simplifiedNum > 0 ? (resultWhole + ' ' + simplifiedNum + '/' + simplifiedDenom) : resultWhole) : (simplifiedNum + '/' + simplifiedDenom)"
|
||||
"answer": "simplifiedDenom == 1 ? (resultWhole + simplifiedNum) : (resultWhole > 0 ? (simplifiedNum > 0 ? (resultWhole + ' ' + simplifiedNum + '/' + simplifiedDenom) : resultWhole) : (simplifiedNum + '/' + simplifiedDenom))"
|
||||
},
|
||||
|
||||
"constraints": {
|
||||
|
||||
@@ -357,21 +357,6 @@ export function applyTransforms(
|
||||
if (!node) return state
|
||||
|
||||
const transforms = node.definition.transform || []
|
||||
if (transforms.length === 0) {
|
||||
// No transforms, but still add a snapshot for the node
|
||||
const snapshot: StateSnapshot = {
|
||||
nodeId,
|
||||
nodeTitle: node.content?.title || nodeId,
|
||||
values: { ...state.values },
|
||||
transforms: [],
|
||||
workingProblem: state.workingProblem,
|
||||
timestamp: Date.now(),
|
||||
}
|
||||
return {
|
||||
...state,
|
||||
snapshots: [...state.snapshots, snapshot],
|
||||
}
|
||||
}
|
||||
|
||||
// Apply transforms in order
|
||||
const newValues = { ...state.values }
|
||||
@@ -391,13 +376,42 @@ export function applyTransforms(
|
||||
}
|
||||
}
|
||||
|
||||
// Create snapshot after applying transforms
|
||||
// Check for workingProblemUpdate on this node
|
||||
let newWorkingProblem = state.workingProblem
|
||||
let newWorkingProblemHistory = state.workingProblemHistory
|
||||
const def = node.definition
|
||||
|
||||
let workingProblemUpdate: { result: string; label: string } | undefined
|
||||
if (def.type === 'checkpoint') {
|
||||
workingProblemUpdate = (def as CheckpointNode).workingProblemUpdate
|
||||
} else if (def.type === 'instruction') {
|
||||
workingProblemUpdate = (def as InstructionNode).workingProblemUpdate
|
||||
}
|
||||
|
||||
if (workingProblemUpdate) {
|
||||
try {
|
||||
const context = createContextFromValues(state.problem, newValues, state.userState)
|
||||
newWorkingProblem = String(evaluate(workingProblemUpdate.result, context))
|
||||
newWorkingProblemHistory = [
|
||||
...state.workingProblemHistory,
|
||||
{
|
||||
value: newWorkingProblem,
|
||||
label: workingProblemUpdate.label,
|
||||
nodeId,
|
||||
},
|
||||
]
|
||||
} catch (error) {
|
||||
console.error(`Working problem update error at ${nodeId}:`, error)
|
||||
}
|
||||
}
|
||||
|
||||
// Create snapshot after applying transforms (with updated working problem)
|
||||
const snapshot: StateSnapshot = {
|
||||
nodeId,
|
||||
nodeTitle: node.content?.title || nodeId,
|
||||
values: { ...newValues },
|
||||
transforms: appliedTransforms,
|
||||
workingProblem: state.workingProblem,
|
||||
workingProblem: newWorkingProblem,
|
||||
timestamp: Date.now(),
|
||||
}
|
||||
|
||||
@@ -406,6 +420,8 @@ export function applyTransforms(
|
||||
values: newValues,
|
||||
computed: { ...state.computed, ...newValues }, // Keep computed in sync for backwards compat
|
||||
hasError,
|
||||
workingProblem: newWorkingProblem,
|
||||
workingProblemHistory: newWorkingProblemHistory,
|
||||
snapshots: [...state.snapshots, snapshot],
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,7 +12,7 @@ import * as fs from 'fs/promises'
|
||||
import * as path from 'path'
|
||||
import * as os from 'os'
|
||||
import { getFlowchartByIdAsync } from './definitions'
|
||||
import { loadFlowchart } from './loader'
|
||||
import { loadFlowchart, simulateWalk, extractAnswer } from './loader'
|
||||
import {
|
||||
generateDiverseExamples,
|
||||
type GeneratedExample,
|
||||
@@ -20,7 +20,6 @@ import {
|
||||
} from './example-generator'
|
||||
import { formatProblemDisplay } from './formatting'
|
||||
import type { ExecutableFlowchart, ProblemValue } from './schema'
|
||||
import { evaluateDisplayAnswer } from '../flowchart-workshop/test-case-validator'
|
||||
|
||||
// =============================================================================
|
||||
// Types
|
||||
@@ -172,13 +171,19 @@ function exampleToProblem(
|
||||
): WorksheetProblem {
|
||||
const display = formatProblemDisplay(flowchart, example.values)
|
||||
|
||||
// Use evaluateDisplayAnswer to compute the answer using the flowchart's display.answer
|
||||
const { answer: computedAnswer } = evaluateDisplayAnswer(flowchart.definition, example.values)
|
||||
const answer = computedAnswer ?? '?'
|
||||
|
||||
// Convert plain text answer to Typst format
|
||||
// For fractions (e.g., "3/4" or "2 1/2"), convert to Typst math mode
|
||||
const typstAnswer = convertToTypstAnswer(answer)
|
||||
// Use simulateWalk + extractAnswer for unified answer computation
|
||||
let answer = '?'
|
||||
let typstAnswer = '?'
|
||||
try {
|
||||
const terminalState = simulateWalk(flowchart, example.values)
|
||||
const { display: answerDisplay } = extractAnswer(flowchart, terminalState)
|
||||
answer = answerDisplay.text || '?'
|
||||
// Use typst template if provided, otherwise convert from text
|
||||
typstAnswer = answerDisplay.typst || convertToTypstAnswer(answer)
|
||||
} catch (err) {
|
||||
console.error('Failed to compute answer via simulateWalk:', err)
|
||||
typstAnswer = convertToTypstAnswer(answer)
|
||||
}
|
||||
|
||||
return {
|
||||
values: example.values,
|
||||
|
||||
Reference in New Issue
Block a user