From de5ca4f5e4af1a5738b0fddc09a7e28cb0c29ba4 Mon Sep 17 00:00:00 2001 From: Hitesh Taneja Date: Sun, 17 May 2026 17:43:39 +0100 Subject: [PATCH] fix: force AUDIO modality for native-audio models in run_live --- src/google/adk/runners.py | 17 ++++++++ tests/unittests/test_runners.py | 75 +++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/src/google/adk/runners.py b/src/google/adk/runners.py index 850c26bbba..b91f3ee3fc 100644 --- a/src/google/adk/runners.py +++ b/src/google/adk/runners.py @@ -1104,6 +1104,23 @@ async def run_live( # AUDIO by default. if run_config.response_modalities is None: run_config.response_modalities = ['AUDIO'] + # Native-audio models only support AUDIO modality. If TEXT was explicitly + # requested, override it and enable transcription so users can still read + # the audio output as text. + if hasattr(self.agent, 'canonical_model') and self.agent.canonical_model: + model_name = self.agent.canonical_model.model or '' + if 'native-audio' in model_name.lower(): + if run_config.response_modalities != ['AUDIO']: + logger.warning( + 'Model %s only supports AUDIO modality. Overriding' + ' response_modalities to [AUDIO].', + model_name, + ) + run_config.response_modalities = ['AUDIO'] + if not run_config.output_audio_transcription: + run_config.output_audio_transcription = ( + types.AudioTranscriptionConfig() + ) if session is None and (user_id is None or session_id is None): raise ValueError( 'Either session or user_id and session_id must be provided.' diff --git a/tests/unittests/test_runners.py b/tests/unittests/test_runners.py index aa3fc030f3..04c4d8036e 100644 --- a/tests/unittests/test_runners.py +++ b/tests/unittests/test_runners.py @@ -19,6 +19,7 @@ from typing import AsyncGenerator from typing import Optional from unittest.mock import AsyncMock +from unittest.mock import MagicMock from google.adk.agents.base_agent import BaseAgent from google.adk.agents.context_cache_config import ContextCacheConfig @@ -361,6 +362,80 @@ async def test_run_live_auto_create_session(): assert session is not None +class MockNativeAudioLiveAgent(MockLiveAgent): + """Mock live agent that reports itself as a native-audio model.""" + + @property + def canonical_model(self): + mock_model = MagicMock() + mock_model.model = "gemini-live-2.5-flash-native-audio" + return mock_model + + +@pytest.mark.asyncio +async def test_run_live_native_audio_model_forces_audio_modality(): + """run_live should override TEXT modality to AUDIO for native-audio models.""" + from google.adk.agents.live_request_queue import LiveRequestQueue + + session_service = InMemorySessionService() + artifact_service = InMemoryArtifactService() + + runner = Runner( + app_name="live_app", + agent=MockNativeAudioLiveAgent("native_audio_agent"), + session_service=session_service, + artifact_service=artifact_service, + auto_create_session=True, + ) + + live_queue = LiveRequestQueue() + run_config = RunConfig(response_modalities=["TEXT"]) + + agen = runner.run_live( + user_id="user", + session_id="session", + live_request_queue=live_queue, + run_config=run_config, + ) + await agen.__anext__() + await agen.aclose() + + assert run_config.response_modalities == ["AUDIO"] + assert run_config.output_audio_transcription is not None + + +@pytest.mark.asyncio +async def test_run_live_native_audio_model_enables_transcription_when_audio(): + """run_live should enable transcription for native-audio models when AUDIO modality is already set.""" + from google.adk.agents.live_request_queue import LiveRequestQueue + + session_service = InMemorySessionService() + artifact_service = InMemoryArtifactService() + + runner = Runner( + app_name="live_app", + agent=MockNativeAudioLiveAgent("native_audio_agent"), + session_service=session_service, + artifact_service=artifact_service, + auto_create_session=True, + ) + + live_queue = LiveRequestQueue() + run_config = RunConfig(response_modalities=["AUDIO"]) + + agen = runner.run_live( + user_id="user", + session_id="session2", + live_request_queue=live_queue, + run_config=run_config, + ) + await agen.__anext__() + await agen.aclose() + + assert run_config.response_modalities == ["AUDIO"] + assert run_config.output_audio_transcription is not None + + @pytest.mark.asyncio async def test_run_live_persists_event_callback_modifications(): """run_live should persist the same event it streams after callback changes."""