Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 23 additions & 13 deletions evaluation_function/correction/correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@
from evaluation_function.schemas.params import Params

# Schema imports
from ..schemas import FSA, ValidationError, ErrorCode
from ..schemas import FSA, ValidationError, ErrorCode, ValidationResult
from ..schemas.result import Result, FSAFeedback, StructuralInfo, LanguageComparison

# Validation imports
from ..validation.validation import (
are_isomorphic,
is_valid_fsa,
is_deterministic,
is_complete,
Expand Down Expand Up @@ -125,7 +126,7 @@ def analyze_fsa_correction(
# Step 1: Validate student FSA structure
# -------------------------------------------------------------------------
student_result = is_valid_fsa(student_fsa)
if not student_result.ok():
if not student_result.ok:
summary = (
"Your FSA has a structural problem that needs to be fixed first."
if len(student_result.errors) == 1
Expand All @@ -147,7 +148,7 @@ def analyze_fsa_correction(
# Step 2: Validate expected FSA (should never fail)
# -------------------------------------------------------------------------
expected_result = is_valid_fsa(expected_fsa)
if not expected_result.ok():
if not expected_result.ok:
return Result(
is_correct=False,
feedback="Oops! There's an issue with the expected answer. Please contact your instructor."
Expand All @@ -158,7 +159,7 @@ def analyze_fsa_correction(
# -------------------------------------------------------------------------
if params.expected_type == "DFA":
det_result = is_deterministic(student_fsa)
if not det_result.ok():
if not det_result.ok:
summary = "Your automaton must be deterministic (a DFA)."
return Result(
is_correct=False,
Expand All @@ -177,15 +178,17 @@ def analyze_fsa_correction(
# -------------------------------------------------------------------------
if params.check_completeness:
comp_result = is_complete(student_fsa)
if not comp_result.ok():
if not comp_result.ok:
validation_errors.extend(comp_result.errors)

# -------------------------------------------------------------------------
# Step 5: Optional minimality check
# -------------------------------------------------------------------------
validation_result = None
if params.check_minimality:
min_errors = is_minimal(student_fsa)
validation_errors.extend(min_errors)
validation_result = is_minimal(student_fsa)
if not validation_result.ok:
validation_errors.extend(validation_result.errors)

# -------------------------------------------------------------------------
# Step 6: Structural analysis (for feedback only)
Expand All @@ -198,20 +201,26 @@ def analyze_fsa_correction(
equivalence_result = fsas_accept_same_language(
student_fsa, expected_fsa
)
equivalence_errors = equivalence_result.errors
equivalence_errors.extend(equivalence_result.errors)

# -------------------------------------------------------------------------
# Step 8: Decide correctness based on evaluation mode
# Step 8: Isomorphism
# -------------------------------------------------------------------------
iso_result = are_isomorphic(student_fsa, expected_fsa)
equivalence_errors.extend(iso_result.errors)

# -------------------------------------------------------------------------
# Step 9: Decide correctness based on evaluation mode
# -------------------------------------------------------------------------
if params.evaluation_mode == "strict":
is_correct = not validation_errors and equivalence_result.ok()
is_correct = validation_result is not None and validation_result.ok and equivalence_result.ok and iso_result.ok
elif params.evaluation_mode == "lenient":
is_correct = equivalence_result.ok()
is_correct = validation_result is not None and validation_result.ok and equivalence_result.ok
else: # partial # I dont know what the partial is meant for, always mark as incorrect?
is_correct = False

# -------------------------------------------------------------------------
# Step 9: Build summary
# Step 10: Build summary
# -------------------------------------------------------------------------
if is_correct:
feedback = (
Expand All @@ -226,9 +235,10 @@ def analyze_fsa_correction(
else "Your FSA has some issues to address."
)
feedback = summary
print(equivalence_errors)

# -------------------------------------------------------------------------
# Step 10: Return result
# Step 11: Return result
# -------------------------------------------------------------------------
return Result(
is_correct=is_correct,
Expand Down
74 changes: 57 additions & 17 deletions evaluation_function/test/test_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@
from evaluation_function.schemas import ValidationError, ErrorCode
from evaluation_function.schemas.utils import make_fsa
from evaluation_function.schemas.result import Result, FSAFeedback
from evaluation_function.schemas.params import Params
from evaluation_function.correction import analyze_fsa_correction


# =============================================================================
# Fixtures
# Fixtures - DFAs
# =============================================================================

@pytest.fixture
Expand Down Expand Up @@ -72,36 +73,54 @@ def equivalent_dfa():
)


# =============================================================================
# Helper: Default Params
# =============================================================================

@pytest.fixture
def default_params():
"""Default Params object for analyze_fsa_correction."""
return Params(
expected_type="DFA",
check_completeness=True,
check_minimality=True,
evaluation_mode="strict",
highlight_errors=True,
feedback_verbosity="detailed"
)


# =============================================================================
# Test Main Pipeline - Returns Result
# =============================================================================

class TestAnalyzeFsaCorrection:
"""Test the main analysis pipeline returns Result."""

def test_equivalent_fsas_correct(self, dfa_accepts_a, equivalent_dfa):
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa)
def test_equivalent_fsas_correct(self, dfa_accepts_a, equivalent_dfa, default_params):
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, default_params)
print(result)
assert isinstance(result, Result)
assert result.is_correct is True
assert "Correct" in result.feedback

def test_different_fsas_incorrect(self, dfa_accepts_a, dfa_accepts_a_or_b):
result = analyze_fsa_correction(dfa_accepts_a, dfa_accepts_a_or_b)
def test_different_fsas_incorrect(self, dfa_accepts_a, dfa_accepts_a_or_b, default_params):
result = analyze_fsa_correction(dfa_accepts_a, dfa_accepts_a_or_b, default_params)
assert isinstance(result, Result)
assert result.is_correct is False

def test_result_has_fsa_feedback(self, dfa_accepts_a, equivalent_dfa):
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa)
def test_result_has_fsa_feedback(self, dfa_accepts_a, equivalent_dfa, default_params):
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, default_params)
assert result.fsa_feedback is not None
assert isinstance(result.fsa_feedback, FSAFeedback)

def test_fsa_feedback_has_structural_info(self, dfa_accepts_a, equivalent_dfa):
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa)
def test_fsa_feedback_has_structural_info(self, dfa_accepts_a, equivalent_dfa, default_params):
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, default_params)
assert result.fsa_feedback.structural is not None
assert result.fsa_feedback.structural.num_states == 3

def test_different_fsas_have_errors(self, dfa_accepts_a, dfa_accepts_a_or_b):
result = analyze_fsa_correction(dfa_accepts_a, dfa_accepts_a_or_b)
def test_different_fsas_have_errors(self, dfa_accepts_a, dfa_accepts_a_or_b, default_params):
result = analyze_fsa_correction(dfa_accepts_a, dfa_accepts_a_or_b, default_params)
assert result.fsa_feedback is not None
assert len(result.fsa_feedback.errors) > 0

Expand All @@ -113,36 +132,48 @@ def test_different_fsas_have_errors(self, dfa_accepts_a, dfa_accepts_a_or_b):
class TestInvalidFsas:
"""Test handling of invalid FSAs."""

def test_invalid_initial_state(self):
def test_invalid_initial_state(self, default_params):
invalid = make_fsa(
states=["q0"],
alphabet=["a"],
transitions=[],
initial="invalid",
accept=[]
)
result = analyze_fsa_correction(invalid, invalid)
result = analyze_fsa_correction(invalid, invalid, default_params)
assert result.is_correct is False
assert result.fsa_feedback is not None
assert len(result.fsa_feedback.errors) > 0

def test_invalid_accept_state(self):
def test_invalid_accept_state(self, default_params):
invalid = make_fsa(
states=["q0"],
alphabet=["a"],
transitions=[],
initial="q0",
accept=["invalid"]
)
result = analyze_fsa_correction(invalid, invalid)
result = analyze_fsa_correction(invalid, invalid, default_params)
assert result.is_correct is False


# =============================================================================
# Test Minimality
# =============================================================================

class TestAnalyzeFsaCorrectionMinimality:
"""Test analyze_fsa_correction with minimality checking."""

def test_minimal_fsa_passes(self, dfa_accepts_a, equivalent_dfa):
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, require_minimal=True)
params = Params(
expected_type="DFA",
check_completeness=True,
check_minimality=True,
evaluation_mode="strict",
highlight_errors=True,
feedback_verbosity="detailed"
)
result = analyze_fsa_correction(dfa_accepts_a, equivalent_dfa, params)
assert result.is_correct is True

def test_non_minimal_fsa_fails_when_required(self, equivalent_dfa):
Expand All @@ -162,9 +193,18 @@ def test_non_minimal_fsa_fails_when_required(self, equivalent_dfa):
initial="q0",
accept=["q1"]
)
result = analyze_fsa_correction(non_minimal, equivalent_dfa, require_minimal=True)
params = Params(
expected_type="DFA",
check_completeness=True,
check_minimality=True,
evaluation_mode="strict",
highlight_errors=True,
feedback_verbosity="detailed"
)
result = analyze_fsa_correction(non_minimal, equivalent_dfa, params)
# Should have minimality error
assert result.fsa_feedback is not None
assert any(e.code == ErrorCode.NOT_MINIMAL for e in result.fsa_feedback.errors)


if __name__ == "__main__":
Expand Down
Loading