scaleapi · danielmillerp · Jan 13, 2026 · Jan 13, 2026
diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/.dockerignore b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/.dockerignore
@@ -0,0 +1,43 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environments
+.env**
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Git
+.git
+.gitignore
+
+# Misc
+.DS_Store
diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/Dockerfile b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/Dockerfile
@@ -0,0 +1,54 @@
+# syntax=docker/dockerfile:1.3
+FROM python:3.12-slim
+COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    htop \
+    vim \
+    curl \
+    tar \
+    python3-dev \
+    postgresql-client \
+    build-essential \
+    libpq-dev \
+    gcc \
+    cmake \
+    netcat-openbsd \
+    nodejs \
+    npm \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/**
+
+# Install tctl (Temporal CLI)
+RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \
+    tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \
+    chmod +x /usr/local/bin/tctl && \
+    rm /tmp/tctl.tar.gz
+
+RUN uv pip install --system --upgrade pip setuptools wheel
+
+ENV UV_HTTP_TIMEOUT=1000
+
+# Copy pyproject.toml and README.md to install dependencies
+COPY 10_async/10_temporal/100_gemini_litellm/pyproject.toml /app/100_gemini_litellm/pyproject.toml
+COPY 10_async/10_temporal/100_gemini_litellm/README.md /app/100_gemini_litellm/README.md
+
+WORKDIR /app/100_gemini_litellm
+
+# Copy the project code
+COPY 10_async/10_temporal/100_gemini_litellm/project /app/100_gemini_litellm/project
+
+# Install the required Python packages
+RUN uv pip install --system .
+
+WORKDIR /app/100_gemini_litellm
+
+ENV PYTHONPATH=/app
+ENV AGENT_NAME=at100-gemini-litellm
+
+# Run the ACP server using uvicorn
+CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"]
+
+# When we deploy the worker, we will replace the CMD with the following
+# CMD ["python", "-m", "run_worker"]
diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/README.md b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/README.md
@@ -0,0 +1,130 @@
+# [Temporal] Using Alternative Models with LiteLLM (Gemini)
+
+**Part of the [OpenAI SDK + Temporal integration series](../README.md)**
+
+## What You'll Learn
+
+This tutorial demonstrates how to use Google's Gemini models (or any other LLM provider) with the OpenAI Agents SDK through LiteLLM. The key insight is that LiteLLM provides a unified interface, allowing you to swap models without changing your agent code structure.
+
+**Key insight:** You can use the same OpenAI Agents SDK patterns with any LLM provider supported by LiteLLM - Gemini, Anthropic Claude, Mistral, and many more.
+
+## Prerequisites
+- Development environment set up (see [main repo README](https://github.com/scaleapi/scale-agentex))
+- Backend services running: `make dev` from repository root (includes Temporal)
+- Temporal UI available at http://localhost:8233
+- **Google Gemini API key** (see setup below)
+- Understanding of OpenAI Agents SDK basics (see [060_open_ai_agents_sdk_hello_world](../060_open_ai_agents_sdk_hello_world/))
+
+## Setup
+
+### 1. Get a Gemini API Key
+
+1. Go to [Google AI Studio](https://aistudio.google.com/apikey)
+2. Create a new API key
+3. Copy the key for the next step
+
+### 2. Configure the API Key
+
+Add to your environment or `manifest.yaml`:
+
+**Option A: Environment variable**
+```bash
+export GEMINI_API_KEY="your-gemini-api-key-here"
+```
+
+**Option B: In manifest.yaml**
+```yaml
+agent:
+  env:
+    GEMINI_API_KEY: "your-gemini-api-key-here"
+```
+
+### 3. Install LiteLLM Dependency
+
+The `pyproject.toml` already includes `litellm>=1.52.0`. When you run the agent, dependencies are installed automatically.
+
+## Quick Start
+
+```bash
+cd examples/tutorials/10_async/10_temporal/100_gemini_litellm
+uv run agentex agents run --manifest manifest.yaml
+```
+
+**Monitor:** Open Temporal UI at http://localhost:8233 to see workflow execution.
+
+## Key Code Changes
+
+The main difference from OpenAI examples is using `LitellmModel`:
+
+```python
+from agents.extensions.models.litellm_model import LitellmModel
+
+# Create a LiteLLM model pointing to Gemini
+gemini_model = LitellmModel(model="gemini/gemini-2.0-flash")
+
+agent = Agent(
+    name="Gemini Assistant",
+    instructions="You are a helpful assistant powered by Gemini.",
+    model=gemini_model,  # Use the LiteLLM model instead of default
+)
+
+# Run works exactly the same way
+result = await Runner.run(agent, user_messages)
+```
+
+## Supported Models
+
+LiteLLM supports many providers. Just change the model string:
+
+| Provider | Model String Example |
+|----------|---------------------|
+| Google Gemini | `gemini/gemini-2.0-flash`, `gemini/gemini-1.5-pro` |
+| Anthropic | `anthropic/claude-3-sonnet-20240229` |
+| Mistral | `mistral/mistral-large-latest` |
+| Cohere | `cohere/command-r-plus` |
+| AWS Bedrock | `bedrock/anthropic.claude-3-sonnet` |
+
+See [LiteLLM Providers](https://docs.litellm.ai/docs/providers) for the full list.
+
+## Why LiteLLM?
+
+**Model Flexibility:** Switch between providers without code changes - just update the model string.
+
+**Unified Interface:** Same OpenAI Agents SDK patterns work with any provider.
+
+**Cost Optimization:** Easily compare costs across providers by switching models.
+
+**Fallback Support:** LiteLLM supports automatic fallbacks if a provider is unavailable.
+
+## Architecture Notes
+
+The Temporal integration remains identical:
+- Workflows are durable and survive restarts
+- LLM calls are wrapped as activities automatically
+- Full observability in Temporal UI
+- Automatic retries on failures
+
+The only change is the model provider - everything else works the same.
+
+## When to Use
+
+- Want to use non-OpenAI models with OpenAI Agents SDK
+- Need to compare model performance across providers
+- Building multi-model systems with fallbacks
+- Cost optimization across different providers
+- Regulatory requirements for specific model providers
+
+## Troubleshooting
+
+**"GEMINI_API_KEY environment variable is not set"**
+- Ensure you've exported the API key or added it to manifest.yaml
+
+**"Model not found" errors**
+- Check the model string format matches LiteLLM's expected format
+- See [LiteLLM Providers](https://docs.litellm.ai/docs/providers) for correct model names
+
+**Rate limiting errors**
+- Gemini has different rate limits than OpenAI
+- Consider adding retry logic or using LiteLLM's built-in retry support
+
+**Previous:** [090_claude_agents_sdk_mvp](../090_claude_agents_sdk_mvp/) - Claude SDK integration
diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/__init__.py b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/__init__.py
@@ -0,0 +1 @@
+# Gemini LiteLLM Tutorial
diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/acp.py b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/acp.py
@@ -0,0 +1,60 @@
+import os
+from datetime import timedelta
+
+from temporalio.contrib.openai_agents import OpenAIAgentsPlugin, ModelActivityParameters
+from agents.extensions.models.litellm_provider import LitellmProvider
+
+# === DEBUG SETUP (AgentEx CLI Debug Support) ===
+if os.getenv("AGENTEX_DEBUG_ENABLED") == "true":
+    import debugpy
+    debug_port = int(os.getenv("AGENTEX_DEBUG_PORT", "5679"))
+    debugpy.configure(subProcess=False)
+    debugpy.listen(debug_port)
+    if os.getenv("AGENTEX_DEBUG_WAIT_FOR_ATTACH", "false").lower() == "true":
+        debugpy.wait_for_client()
+# === END DEBUG SETUP ===
+
+from agentex.lib.types.fastacp import TemporalACPConfig
+from agentex.lib.sdk.fastacp.fastacp import FastACP
+from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ContextInterceptor
+
+context_interceptor = ContextInterceptor()
+
+# Create the ACP server
+# We use LitellmProvider instead of TemporalStreamingModelProvider
+# to enable using Gemini and other models through LiteLLM
+acp = FastACP.create(
+    acp_type="async",
+    config=TemporalACPConfig(
+        # When deployed to the cluster, the Temporal address will automatically be set to the cluster address
+        # For local development, we set the address manually to talk to the local Temporal service set up via docker compose
+        #
+        # We use the OpenAI Agents SDK plugin because Temporal has built-in support for it,
+        # handling serialization and activity wrapping automatically. LitellmProvider lets us
+        # route to different model providers (like Gemini) while keeping all that infrastructure.
+        type="temporal",
+        temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"),
+        plugins=[OpenAIAgentsPlugin(
+            model_params=ModelActivityParameters(
+                start_to_close_timeout=timedelta(days=1)
+            ),
+            model_provider=LitellmProvider(),
+        )],
+        interceptors=[context_interceptor]
+    )
+)
+
+
+# Notice that we don't need to register any handlers when we use type="temporal"
+# If you look at the code in agentex.sdk.fastacp.impl.temporal_acp
+# You can see that these handlers are automatically registered when the ACP is created
+
+# @acp.on_task_create
+# This will be handled by the method in your workflow that is decorated with @workflow.run
+
+# @acp.on_task_event_send
+# This will be handled by the method in your workflow that is decorated with @workflow.signal(name=SignalName.RECEIVE_MESSAGE)
+
+# @acp.on_task_cancel
+# This does not need to be handled by your workflow.
+# It is automatically handled by the temporal client which cancels the workflow directly
diff --git a/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/run_worker.py b/examples/tutorials/10_async/10_temporal/100_gemini_litellm/project/run_worker.py
@@ -0,0 +1,62 @@
+import asyncio
+from datetime import timedelta
+
+from temporalio.contrib.openai_agents import OpenAIAgentsPlugin, ModelActivityParameters
+from agents.extensions.models.litellm_provider import LitellmProvider
+
+from project.workflow import At100GeminiLitellmWorkflow
+from agentex.lib.utils.debug import setup_debug_if_enabled
+from agentex.lib.utils.logging import make_logger
+from agentex.lib.environment_variables import EnvironmentVariables
+from agentex.lib.core.temporal.activities import get_all_activities
+from agentex.lib.core.temporal.workers.worker import AgentexWorker
+from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ContextInterceptor
+
+environment_variables = EnvironmentVariables.refresh()
+
+logger = make_logger(__name__)
+
+
+async def main():
+    # Setup debug mode if enabled
+    setup_debug_if_enabled()
+
+    task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE
+    if task_queue_name is None:
+        raise ValueError("WORKFLOW_TASK_QUEUE is not set")
+
+    # Add activities to the worker
+    all_activities = get_all_activities() + []  # add your own activities here
+
+    # ============================================================================
+    # LITELLM SETUP: Interceptor + LitellmProvider
+    # ============================================================================
+    # The ContextInterceptor threads task_id through activity headers using
+    # Temporal's interceptor pattern. This enables runtime context without
+    # forking the Temporal plugin.
+    #
+    # We use LitellmProvider instead of TemporalStreamingModelProvider to
+    # enable routing to Gemini and other models through LiteLLM.
+    context_interceptor = ContextInterceptor()
+
+    # Create a worker with automatic tracing
+    # IMPORTANT: We use the STANDARD temporalio.contrib.openai_agents.OpenAIAgentsPlugin
+    # but with LitellmProvider to handle model routing to Gemini.
+    worker = AgentexWorker(
+        task_queue=task_queue_name,
+        plugins=[OpenAIAgentsPlugin(
+            model_params=ModelActivityParameters(
+                start_to_close_timeout=timedelta(days=1)
+            ),
+            model_provider=LitellmProvider(),
+        )],
+        interceptors=[context_interceptor]
+    )
+
+    await worker.run(
+        activities=all_activities,
+        workflow=At100GeminiLitellmWorkflow,
+    )
+
+if __name__ == "__main__":
+    asyncio.run(main())