diff --git a/src/google/adk/evaluation/final_response_match_v2.py b/src/google/adk/evaluation/final_response_match_v2.py index 713b421e3d..9417fffb19 100644 --- a/src/google/adk/evaluation/final_response_match_v2.py +++ b/src/google/adk/evaluation/final_response_match_v2.py @@ -228,6 +228,14 @@ def aggregate_invocation_results( continue num_evaluated += 1 num_valid += result.score + + if num_evaluated == 0: + return EvaluationResult( + overall_score=None, + overall_eval_status=EvalStatus.NOT_EVALUATED, + per_invocation_results=per_invocation_results, + ) + overall_score = num_valid / num_evaluated return EvaluationResult( overall_score=overall_score, diff --git a/tests/unittests/evaluation/test_final_response_match_v2.py b/tests/unittests/evaluation/test_final_response_match_v2.py index ce44901ab5..57927339b3 100644 --- a/tests/unittests/evaluation/test_final_response_match_v2.py +++ b/tests/unittests/evaluation/test_final_response_match_v2.py @@ -486,3 +486,34 @@ def test_aggregate_invocation_results(): # Only 4 / 8 invocations are evaluated, and 2 / 4 are valid. assert aggregated_result.overall_score == 0.5 assert aggregated_result.overall_eval_status == EvalStatus.PASSED + + +def test_aggregate_invocation_results_none_evaluated(): + evaluator = _create_test_evaluator_gemini(threshold=0.5) + + actual_invocation, expected_invocation = _create_test_invocations( + "candidate text", "reference text" + ) + + per_invocation_results = [ + PerInvocationResult( + actual_invocation=actual_invocation, + expected_invocation=expected_invocation, + score=None, + eval_status=EvalStatus.NOT_EVALUATED, + ), + PerInvocationResult( + actual_invocation=actual_invocation, + expected_invocation=expected_invocation, + score=1.0, + eval_status=EvalStatus.NOT_EVALUATED, + ), + ] + + aggregated_result = evaluator.aggregate_invocation_results( + per_invocation_results + ) + + assert aggregated_result.overall_score is None + assert aggregated_result.overall_eval_status == EvalStatus.NOT_EVALUATED + assert aggregated_result.per_invocation_results == per_invocation_results