feat: use interaction_id to pass response_id

zakahan · zakahan · commit 56652f2f8756 · 2026-01-04T19:33:12.000+08:00
diff --git a/veadk/agent.py b/veadk/agent.py
@@ -15,7 +15,7 @@
 from __future__ import annotations
 
 import os
-from typing import Optional, Union, AsyncGenerator
+from typing import Optional, Union
 
 # If user didn't set LITELLM_LOCAL_MODEL_COST_MAP, set it to True
 # to enable local model cost map.
@@ -24,12 +24,11 @@
 if not os.getenv("LITELLM_LOCAL_MODEL_COST_MAP"):
     os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
 
-from google.adk.agents import LlmAgent, RunConfig, InvocationContext
+from google.adk.agents import LlmAgent, RunConfig
 from google.adk.agents.base_agent import BaseAgent
 from google.adk.agents.context_cache_config import ContextCacheConfig
 from google.adk.agents.llm_agent import InstructionProvider, ToolUnion
 from google.adk.agents.run_config import StreamingMode
-from google.adk.events import Event, EventActions
 from google.adk.models.lite_llm import LiteLlm
 from google.adk.runners import Runner
 from google.genai import types
@@ -178,12 +177,6 @@ def model_post_init(self, __context: Any) -> None:
                     api_base=self.model_api_base,
                     **self.model_extra_config,
                 )
-                if not self.context_cache_config:
-                    self.context_cache_config = ContextCacheConfig(
-                        cache_intervals=100,  # maximum number
-                        ttl_seconds=315360000,
-                        min_tokens=0,
-                    )
             else:
                 fallbacks = None
                 if isinstance(self.model_name, list):
@@ -288,28 +281,6 @@ def model_post_init(self, __context: Any) -> None:
             f"Agent: {self.model_dump(include={'name', 'model_name', 'model_api_base', 'tools'})}"
         )
 
-    async def _run_async_impl(
-        self, ctx: InvocationContext
-    ) -> AsyncGenerator[Event, None]:
-        if self.enable_responses:
-            if not ctx.context_cache_config:
-                ctx.context_cache_config = self.context_cache_config
-
-        async for event in super()._run_async_impl(ctx):
-            yield event
-            if self.enable_responses and event.cache_metadata:
-                # for persistent short-term memory with response api
-                session_state_event = Event(
-                    invocation_id=event.invocation_id,
-                    author=event.author,
-                    actions=EventActions(
-                        state_delta={
-                            "response_id": event.cache_metadata.cache_name,
-                        }
-                    ),
-                )
-                yield session_state_event
-
     async def _run(
         self,
         runner,
diff --git a/veadk/memory/short_term_memory.py b/veadk/memory/short_term_memory.py
@@ -32,7 +32,6 @@
 from veadk.memory.short_term_memory_backends.sqlite_backend import (
     SQLiteSTMBackend,
 )
-from veadk.models.ark_llm import build_cache_metadata
 from veadk.utils.logger import get_logger
 
 logger = get_logger(__name__)
@@ -50,21 +49,6 @@ async def wrapper(*args, **kwargs):
     setattr(obj, "get_session", wrapper)
 
 
-def enable_responses_api_for_session_service(result, *args, **kwargs):
-    if result and isinstance(result, Session):
-        if result.events:
-            for event in result.events:
-                if (
-                    event.actions
-                    and event.actions.state_delta
-                    and not event.cache_metadata
-                    and "response_id" in event.actions.state_delta
-                ):
-                    event.cache_metadata = build_cache_metadata(
-                        response_id=event.actions.state_delta.get("response_id"),
-                    )
-
-
 class ShortTermMemory(BaseModel):
     """Short term memory for agent execution.
 
@@ -186,11 +170,6 @@ def model_post_init(self, __context: Any) -> None:
                         db_kwargs=self.db_kwargs, **self.backend_configs
                     ).session_service
 
-        if self.backend != "local":
-            wrap_get_session_with_callbacks(
-                self._session_service, enable_responses_api_for_session_service
-            )
-
         if self.after_load_memory_callback:
             wrap_get_session_with_callbacks(
                 self._session_service, self.after_load_memory_callback
diff --git a/veadk/models/ark_llm.py b/veadk/models/ark_llm.py
@@ -17,10 +17,9 @@
 import base64
 import json
 from typing import Any, Dict, Union, AsyncGenerator, Tuple, List, Optional, Literal
+from typing_extensions import override
 
-from google.adk.models import LlmRequest, LlmResponse
-from google.adk.models.lite_llm import LiteLlm
-from google.adk.models.cache_metadata import CacheMetadata
+from google.adk.models import LlmRequest, LlmResponse, Gemini
 from google.genai import types
 from pydantic import Field, BaseModel
 from volcenginesdkarkruntime import AsyncArk
@@ -148,24 +147,6 @@ def _schema_to_dict(schema: types.Schema | dict[str, Any]) -> dict:
     return schema_dict
 
 
-def build_cache_metadata(response_id: str) -> CacheMetadata:
-    """Create a new CacheMetadata instance for agent response tracking.
-    Args:
-        response_id: Response ID to track
-    Returns:
-        A new CacheMetadata instance with the agent-response mapping
-    """
-    # `adk >= 1.17`
-    cache_metadata = CacheMetadata(
-        cache_name=response_id,
-        expire_time=0,
-        fingerprint="",
-        invocations_used=0,
-        contents_count=0,
-    )
-    return cache_metadata
-
-
 # -----------------------------------------------------------------
 # inputs param transform ------------------------------------------
 def _file_data_to_content_param(
@@ -638,8 +619,7 @@ def ark_response_to_generate_content_response(
         )
 
     # previous_response_id
-    previous_response_id = raw_response.id
-    llm_response.cache_metadata = build_cache_metadata(previous_response_id)
+    llm_response.interaction_id = raw_response.id
 
     return llm_response
 
@@ -662,12 +642,29 @@ async def aresponse(
         return raw_response
 
 
-class ArkLlm(LiteLlm):
+class ArkLlm(Gemini):
+    model: str
     llm_client: ArkLlmClient = Field(default_factory=ArkLlmClient)
     _additional_args: Dict[str, Any] = None
+    use_interactions_api: bool = True
 
     def __init__(self, **kwargs):
+        # adk version check
+        if "previous_interaction_id" not in LlmRequest.model_fields:
+            raise ImportError(
+                "If using the ResponsesAPI, "
+                "please upgrade the version of google-adk to `1.21.0` or higher with the command: "
+                "`pip install -U 'google-adk>=1.21.0'`"
+            )
         super().__init__(**kwargs)
+        drop_params = kwargs.pop("drop_params", None)
+        self._additional_args = dict(kwargs)
+        self._additional_args.pop("llm_client", None)
+        self._additional_args.pop("messages", None)
+        self._additional_args.pop("tools", None)
+        self._additional_args.pop("stream", None)
+        if drop_params is not None:
+            self._additional_args["drop_params"] = drop_params
 
     async def generate_content_async(
         self, llm_request: LlmRequest, stream: bool = False
@@ -694,8 +691,8 @@ async def generate_content_async(
         # ------------------------------------------------------ #
         # get previous_response_id
         previous_response_id = None
-        if llm_request.cache_metadata and llm_request.cache_metadata.cache_name:
-            previous_response_id = llm_request.cache_metadata.cache_name
+        if llm_request.previous_interaction_id:
+            previous_response_id = llm_request.previous_interaction_id
         responses_args = {
             "model": self.model,
             "instructions": instructions,
@@ -723,3 +720,11 @@ async def generate_content_async(
             raw_response = await self.llm_client.aresponse(**responses_args)
             llm_response = ark_response_to_generate_content_response(raw_response)
             yield llm_response
+
+    @classmethod
+    @override
+    def supported_models(cls) -> list[str]:
+        return [
+            # For OpenAI models (e.g., "openai/gpt-4o")
+            r"openai/.*",
+        ]