diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/.dockerignore b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/.dockerignore new file mode 100644 index 00000000..c4948947 --- /dev/null +++ b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/.dockerignore @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/Dockerfile b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/Dockerfile new file mode 100644 index 00000000..b1b52a9a --- /dev/null +++ b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/Dockerfile @@ -0,0 +1,54 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy pyproject.toml and README.md to install dependencies +COPY 10_async/10_temporal/100_gemini_litellm/pyproject.toml /app/100_gemini_litellm/pyproject.toml +COPY 10_async/10_temporal/100_gemini_litellm/README.md /app/100_gemini_litellm/README.md + +WORKDIR /app/100_gemini_litellm + +# Copy the project code +COPY 10_async/10_temporal/100_gemini_litellm/project /app/100_gemini_litellm/project + +# Install the required Python packages +RUN uv pip install --system . + +WORKDIR /app/100_gemini_litellm + +ENV PYTHONPATH=/app +ENV AGENT_NAME=at100-gemini-litellm + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/README.md b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/README.md new file mode 100644 index 00000000..b566fe2b --- /dev/null +++ b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/README.md @@ -0,0 +1,130 @@ +# [Temporal] Using Alternative Models with LiteLLM (Gemini) + +**Part of the [OpenAI SDK + Temporal integration series](../README.md)** + +## What You'll Learn + +This tutorial demonstrates how to use Google's Gemini models (or any other LLM provider) with the OpenAI Agents SDK through LiteLLM. The key insight is that LiteLLM provides a unified interface, allowing you to swap models without changing your agent code structure. + +**Key insight:** You can use the same OpenAI Agents SDK patterns with any LLM provider supported by LiteLLM - Gemini, Anthropic Claude, Mistral, and many more. + +## Prerequisites +- Development environment set up (see [main repo README](https://github.com/scaleapi/scale-agentex)) +- Backend services running: `make dev` from repository root (includes Temporal) +- Temporal UI available at http://localhost:8233 +- **Google Gemini API key** (see setup below) +- Understanding of OpenAI Agents SDK basics (see [060_open_ai_agents_sdk_hello_world](../060_open_ai_agents_sdk_hello_world/)) + +## Setup + +### 1. Get a Gemini API Key + +1. Go to [Google AI Studio](https://aistudio.google.com/apikey) +2. Create a new API key +3. Copy the key for the next step + +### 2. Configure the API Key + +Add to your environment or `manifest.yaml`: + +**Option A: Environment variable** +```bash +export GEMINI_API_KEY="your-gemini-api-key-here" +``` + +**Option B: In manifest.yaml** +```yaml +agent: + env: + GEMINI_API_KEY: "your-gemini-api-key-here" +``` + +### 3. Install LiteLLM Dependency + +The `pyproject.toml` already includes `litellm>=1.52.0`. When you run the agent, dependencies are installed automatically. + +## Quick Start + +```bash +cd examples/tutorials/10_async/10_temporal/100_gemini_litellm +uv run agentex agents run --manifest manifest.yaml +``` + +**Monitor:** Open Temporal UI at http://localhost:8233 to see workflow execution. + +## Key Code Changes + +The main difference from OpenAI examples is using `LitellmModel`: + +```python +from agents.extensions.models.litellm_model import LitellmModel + +# Create a LiteLLM model pointing to Gemini +gemini_model = LitellmModel(model="gemini/gemini-2.0-flash") + +agent = Agent( + name="Gemini Assistant", + instructions="You are a helpful assistant powered by Gemini.", + model=gemini_model, # Use the LiteLLM model instead of default +) + +# Run works exactly the same way +result = await Runner.run(agent, user_messages) +``` + +## Supported Models + +LiteLLM supports many providers. Just change the model string: + +| Provider | Model String Example | +|----------|---------------------| +| Google Gemini | `gemini/gemini-2.0-flash`, `gemini/gemini-1.5-pro` | +| Anthropic | `anthropic/claude-3-sonnet-20240229` | +| Mistral | `mistral/mistral-large-latest` | +| Cohere | `cohere/command-r-plus` | +| AWS Bedrock | `bedrock/anthropic.claude-3-sonnet` | + +See [LiteLLM Providers](https://docs.litellm.ai/docs/providers) for the full list. + +## Why LiteLLM? + +**Model Flexibility:** Switch between providers without code changes - just update the model string. + +**Unified Interface:** Same OpenAI Agents SDK patterns work with any provider. + +**Cost Optimization:** Easily compare costs across providers by switching models. + +**Fallback Support:** LiteLLM supports automatic fallbacks if a provider is unavailable. + +## Architecture Notes + +The Temporal integration remains identical: +- Workflows are durable and survive restarts +- LLM calls are wrapped as activities automatically +- Full observability in Temporal UI +- Automatic retries on failures + +The only change is the model provider - everything else works the same. + +## When to Use + +- Want to use non-OpenAI models with OpenAI Agents SDK +- Need to compare model performance across providers +- Building multi-model systems with fallbacks +- Cost optimization across different providers +- Regulatory requirements for specific model providers + +## Troubleshooting + +**"GEMINI_API_KEY environment variable is not set"** +- Ensure you've exported the API key or added it to manifest.yaml + +**"Model not found" errors** +- Check the model string format matches LiteLLM's expected format +- See [LiteLLM Providers](https://docs.litellm.ai/docs/providers) for correct model names + +**Rate limiting errors** +- Gemini has different rate limits than OpenAI +- Consider adding retry logic or using LiteLLM's built-in retry support + +**Previous:** [090_claude_agents_sdk_mvp](../090_claude_agents_sdk_mvp/) - Claude SDK integration diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/__init__.py b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/__init__.py new file mode 100644 index 00000000..8fca5e6e --- /dev/null +++ b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/__init__.py @@ -0,0 +1 @@ +# Gemini LiteLLM Tutorial diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/acp.py b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/acp.py new file mode 100644 index 00000000..9d2afdc3 --- /dev/null +++ b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/acp.py @@ -0,0 +1,60 @@ +import os +from datetime import timedelta + +from temporalio.contrib.openai_agents import OpenAIAgentsPlugin, ModelActivityParameters +from agents.extensions.models.litellm_provider import LitellmProvider + +# === DEBUG SETUP (AgentEx CLI Debug Support) === +if os.getenv("AGENTEX_DEBUG_ENABLED") == "true": + import debugpy + debug_port = int(os.getenv("AGENTEX_DEBUG_PORT", "5679")) + debugpy.configure(subProcess=False) + debugpy.listen(debug_port) + if os.getenv("AGENTEX_DEBUG_WAIT_FOR_ATTACH", "false").lower() == "true": + debugpy.wait_for_client() +# === END DEBUG SETUP === + +from agentex.lib.types.fastacp import TemporalACPConfig +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ContextInterceptor + +context_interceptor = ContextInterceptor() + +# Create the ACP server +# We use LitellmProvider instead of TemporalStreamingModelProvider +# to enable using Gemini and other models through LiteLLM +acp = FastACP.create( + acp_type="async", + config=TemporalACPConfig( + # When deployed to the cluster, the Temporal address will automatically be set to the cluster address + # For local development, we set the address manually to talk to the local Temporal service set up via docker compose + # + # We use the OpenAI Agents SDK plugin because Temporal has built-in support for it, + # handling serialization and activity wrapping automatically. LitellmProvider lets us + # route to different model providers (like Gemini) while keeping all that infrastructure. + type="temporal", + temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), + plugins=[OpenAIAgentsPlugin( + model_params=ModelActivityParameters( + start_to_close_timeout=timedelta(days=1) + ), + model_provider=LitellmProvider(), + )], + interceptors=[context_interceptor] + ) +) + + +# Notice that we don't need to register any handlers when we use type="temporal" +# If you look at the code in agentex.sdk.fastacp.impl.temporal_acp +# You can see that these handlers are automatically registered when the ACP is created + +# @acp.on_task_create +# This will be handled by the method in your workflow that is decorated with @workflow.run + +# @acp.on_task_event_send +# This will be handled by the method in your workflow that is decorated with @workflow.signal(name=SignalName.RECEIVE_MESSAGE) + +# @acp.on_task_cancel +# This does not need to be handled by your workflow. +# It is automatically handled by the temporal client which cancels the workflow directly diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/run_worker.py b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/run_worker.py new file mode 100644 index 00000000..7d9ac651 --- /dev/null +++ b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/run_worker.py @@ -0,0 +1,62 @@ +import asyncio +from datetime import timedelta + +from temporalio.contrib.openai_agents import OpenAIAgentsPlugin, ModelActivityParameters +from agents.extensions.models.litellm_provider import LitellmProvider + +from project.workflow import At100GeminiLitellmWorkflow +from agentex.lib.utils.debug import setup_debug_if_enabled +from agentex.lib.utils.logging import make_logger +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.activities import get_all_activities +from agentex.lib.core.temporal.workers.worker import AgentexWorker +from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ContextInterceptor + +environment_variables = EnvironmentVariables.refresh() + +logger = make_logger(__name__) + + +async def main(): + # Setup debug mode if enabled + setup_debug_if_enabled() + + task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE + if task_queue_name is None: + raise ValueError("WORKFLOW_TASK_QUEUE is not set") + + # Add activities to the worker + all_activities = get_all_activities() + [] # add your own activities here + + # ============================================================================ + # LITELLM SETUP: Interceptor + LitellmProvider + # ============================================================================ + # The ContextInterceptor threads task_id through activity headers using + # Temporal's interceptor pattern. This enables runtime context without + # forking the Temporal plugin. + # + # We use LitellmProvider instead of TemporalStreamingModelProvider to + # enable routing to Gemini and other models through LiteLLM. + context_interceptor = ContextInterceptor() + + # Create a worker with automatic tracing + # IMPORTANT: We use the STANDARD temporalio.contrib.openai_agents.OpenAIAgentsPlugin + # but with LitellmProvider to handle model routing to Gemini. + worker = AgentexWorker( + task_queue=task_queue_name, + plugins=[OpenAIAgentsPlugin( + model_params=ModelActivityParameters( + start_to_close_timeout=timedelta(days=1) + ), + model_provider=LitellmProvider(), + )], + interceptors=[context_interceptor] + ) + + await worker.run( + activities=all_activities, + workflow=At100GeminiLitellmWorkflow, + ) + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/workflow.py b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/workflow.py new file mode 100644 index 00000000..249bdaa5 --- /dev/null +++ b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/workflow.py @@ -0,0 +1,234 @@ +""" +Gemini + LiteLLM + Temporal Integration Tutorial + +This tutorial demonstrates how to use Google's Gemini models through LiteLLM +with the OpenAI Agents SDK and Temporal workflows. It shows how to: + +1. Use LiteLLM to route requests to Gemini instead of OpenAI +2. Maintain the same durable workflow patterns with a different model provider +3. Leverage the OpenAI Agents SDK interface while using non-OpenAI models + +KEY CONCEPTS DEMONSTRATED: +- LiteLLM model provider for multi-model support +- Gemini model integration with OpenAI-compatible interface +- Temporal workflow durability with alternative LLM providers +- Model-agnostic agent patterns + +This builds on the OpenAI Agents SDK tutorials, showing how to swap models easily. +""" + +import os +import json +from typing import Any, Dict, List + +from agents import Agent, Runner +from temporalio import workflow + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CreateTaskParams +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.utils.model_utils import BaseModel +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.types.workflow import SignalName +from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow +from agentex.lib.core.tracing.tracing_processor_manager import ( + add_tracing_processor_config, +) + +# Configure tracing processor (optional - only if you have SGP credentials) +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + ) +) + +environment_variables = EnvironmentVariables.refresh() + +if environment_variables.WORKFLOW_NAME is None: + raise ValueError("Environment variable WORKFLOW_NAME is not set") + +if environment_variables.AGENT_NAME is None: + raise ValueError("Environment variable AGENT_NAME is not set") + +# Note: GEMINI_API_KEY should be set in your environment +# LiteLLM will use this automatically when routing to Gemini models + +logger = make_logger(__name__) + + +class StateModel(BaseModel): + """ + State model for preserving conversation history across turns. + + This allows the agent to maintain context throughout the conversation, + making it possible to reference previous messages and build on the discussion. + """ + + input_list: List[Dict[str, Any]] + turn_number: int + + +@workflow.defn(name=environment_variables.WORKFLOW_NAME) +class At100GeminiLitellmWorkflow(BaseWorkflow): + """ + Gemini + LiteLLM Temporal Workflow + + This workflow demonstrates using Google's Gemini models through LiteLLM + with the OpenAI Agents SDK. The key insight is that LiteLLM provides a + unified interface, allowing you to swap models without changing your + agent code structure. + + KEY FEATURES: + - Use Gemini models with OpenAI Agents SDK interface + - Same durable workflow patterns as OpenAI tutorials + - Model-agnostic agent development + - Full observability through Temporal dashboard + """ + + def __init__(self): + super().__init__(display_name=environment_variables.AGENT_NAME) + self._complete_task = False + self._state: StateModel | None = None + self._task_id = None + self._trace_id = None + self._parent_span_id = None + + @workflow.signal(name=SignalName.RECEIVE_EVENT) + async def on_task_event_send(self, params: SendEventParams) -> None: + """ + Handle incoming user messages and respond using Gemini via LiteLLM + + This signal handler demonstrates using alternative model providers: + 1. Receive user message through Temporal signal + 2. Echo message back to UI for visibility + 3. Create agent with LitellmModel pointing to Gemini + 4. Return agent's response to user + + LITELLM INTEGRATION: + - LitellmModel wraps the model selection, routing to Gemini + - The agent interface remains identical to OpenAI examples + - Temporal durability works the same way regardless of model provider + """ + logger.info(f"Received task message instruction: {params}") + + if self._state is None: + raise ValueError("State is not initialized") + + # Increment turn number for tracing + self._state.turn_number += 1 + + self._task_id = params.task.id + self._trace_id = params.task.id + + # Add the user message to conversation history + self._state.input_list.append({"role": "user", "content": params.event.content.content}) + + # ============================================================================ + # STEP 1: Echo User Message + # ============================================================================ + await adk.messages.create(task_id=params.task.id, content=params.event.content) + + # ============================================================================ + # STEP 2: Wrap execution in tracing span + # ============================================================================ + async with adk.tracing.span( + trace_id=params.task.id, + name=f"Turn {self._state.turn_number}", + input=self._state.model_dump(), + ) as span: + self._parent_span_id = span.id if span else None + + # ============================================================================ + # STEP 3: Create Agent with Gemini via LiteLLM + # ============================================================================ + # The key difference from OpenAI examples is specifying the model. + # LiteLLM uses a "provider/model" format: + # - "gemini/gemini-2.0-flash" for Gemini 2.0 Flash + # - "gemini/gemini-1.5-pro" for Gemini 1.5 Pro + # - See https://docs.litellm.ai/docs/providers/gemini for more options + # + # You can also use other providers: + # - "anthropic/claude-3-sonnet-20240229" for Claude + # - "mistral/mistral-large-latest" for Mistral + # - And many more! + # + # The LitellmProvider configured in acp.py and run_worker.py handles + # routing the model string to the appropriate provider. + + agent = Agent( + name="Gemini Assistant", + instructions="You are a helpful assistant powered by Google's Gemini model. " + "You respond concisely and clearly to user questions. " + "When appropriate, mention that you're powered by Gemini via LiteLLM.", + model="gemini/gemini-2.0-flash", + ) + + # ============================================================================ + # STEP 4: Run Agent with Temporal Durability + # ============================================================================ + # The Runner.run() call works exactly the same as with OpenAI. + # LiteLLM handles routing the request to Gemini transparently. + # Temporal still provides durability and automatic retries. + + result = await Runner.run(agent, self._state.input_list) + + # Update the state with the assistant's response for the next turn + if hasattr(result, "messages") and result.messages: + for msg in result.messages: + if msg.get("role") == "assistant" and msg not in self._state.input_list: + self._state.input_list.append(msg) + + # Set span output for tracing + span.output = self._state.model_dump() + + # Send the response to the user + await adk.messages.create( + task_id=params.task.id, + content=TextContent(author="agent", content=result.final_output) + ) + + @workflow.run + async def on_task_create(self, params: CreateTaskParams) -> str: + """ + Temporal Workflow Entry Point - Long-Running Agent Conversation + + This method runs when the workflow starts and keeps the agent conversation alive. + The pattern is identical to other tutorials - only the model provider changes. + """ + logger.info(f"Received task create params: {params}") + + # Initialize the conversation state + self._state = StateModel( + input_list=[], + turn_number=0, + ) + + # Send welcome message + await adk.messages.create( + task_id=params.task.id, + content=TextContent( + author="agent", + content=f"Hello! I'm your assistant powered by Google's Gemini model via LiteLLM!\n\n" + f"This demonstrates how to use alternative model providers with the OpenAI Agents SDK " + f"and Temporal workflows. The code structure is nearly identical to OpenAI examples - " + f"only the model specification changes.\n\n" + f"Task created with params:\n{json.dumps(params.params, indent=2)}\n\n" + f"Send me a message and I'll respond using Gemini!", + ), + ) + + # Wait for completion signal + await workflow.wait_condition( + lambda: self._complete_task, + timeout=None, + ) + return "Agent conversation completed" + + @workflow.signal + async def complete_task_signal(self) -> None: + """Signal to gracefully complete the agent conversation workflow""" + logger.info("Received signal to complete the agent conversation") + self._complete_task = True diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/pyproject.toml b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/pyproject.toml new file mode 100644 index 00000000..9f0098e0 --- /dev/null +++ b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/pyproject.toml @@ -0,0 +1,32 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "at100_gemini_litellm" +version = "0.1.0" +description = "An AgentEx agent using Gemini via LiteLLM" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk>=0.6.0", + "openai-agents>=0.4.2", + "temporalio>=1.18.2", + "scale-gp", + "litellm>=1.52.0", +] + +[project.optional-dependencies] +dev = [ + "debugpy>=1.8.15", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88