From 962eab7a06b3a240607f7e43b1dbbf74339a69c1 Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 12:10:26 +0800 Subject: [PATCH 1/9] refactor --- .../workflows/agentscope/react/react_agent.py | 9 ++------- .../workflows/agentscope/react/react_workflow.py | 1 - .../workflows/agentscope/react/templates.py | 16 ++++++++++++++-- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/trinity/common/workflows/agentscope/react/react_agent.py b/trinity/common/workflows/agentscope/react/react_agent.py index 0cf8babe22..2ec114b405 100644 --- a/trinity/common/workflows/agentscope/react/react_agent.py +++ b/trinity/common/workflows/agentscope/react/react_agent.py @@ -16,7 +16,6 @@ def __init__( model_name: str, system_prompt: str, generate_kwargs: dict, - response_structure: Type[BaseModel], max_iters: int = 10, toolkit: Toolkit | None = None, ): @@ -43,11 +42,9 @@ def __init__( model=self.agent_model, formatter=OpenAIChatFormatter(), # we enable agentscope's meta tool to allow agent to call tools dynamically without pre-registration - enable_meta_tool=True, toolkit=toolkit, max_iters=max_iters, ) - self.response_structure = response_structure async def reply(self, query: str) -> Dict: """Generate a response from the agent given a query. @@ -58,8 +55,6 @@ async def reply(self, query: str) -> Dict: Returns: Dict: The structured response. """ - - response = await self.agent.reply( - Msg("user", query, role="user"), structured_model=self.response_structure + return await self.agent.reply( + msg=Msg("user", query, role="user") ) - return response.metadata or {} diff --git a/trinity/common/workflows/agentscope/react/react_workflow.py b/trinity/common/workflows/agentscope/react/react_workflow.py index 203af35623..4b25f1ece8 100644 --- a/trinity/common/workflows/agentscope/react/react_workflow.py +++ b/trinity/common/workflows/agentscope/react/react_workflow.py @@ -57,7 +57,6 @@ def __init__( "temperature": self.task.rollout_args.temperature, "max_tokens": self.task.rollout_args.max_tokens or 4096, }, - response_structure=template.response_structure, ) async def run_async(self): diff --git a/trinity/common/workflows/agentscope/react/templates.py b/trinity/common/workflows/agentscope/react/templates.py index 0b2ba2fd3f..ff85e90a2c 100644 --- a/trinity/common/workflows/agentscope/react/templates.py +++ b/trinity/common/workflows/agentscope/react/templates.py @@ -1,8 +1,11 @@ +import re from dataclasses import dataclass from typing import Dict, Optional, Type from pydantic import BaseModel, Field +from agentscope.message import Msg + from trinity.common.rewards import RewardFn from trinity.common.rewards.math_reward import MathBoxedRewardFn @@ -19,7 +22,7 @@ class GSM8KResponseStructure(BaseModel): class GSM8KRewardFn(MathBoxedRewardFn): def __call__( # type: ignore [override] self, - response: dict, + response: Msg, truth: str, format_score_coef: float = 0.1, **kwargs, @@ -29,8 +32,17 @@ def __call__( # type: ignore [override] truth = truth.split("####")[1].strip() else: truth = str(truth) + # parse the final answer from the response message + result = response.get_text_content() + if result is not None: + # find the final answer in boxed format + match = re.search(pattern=r"\\boxed\{([^}]*)\}", string=result) + if match: + result = match.group(1).strip() + else: + result = None return super().__call__( - response=response.get("result", ""), + response=result, truth=truth, with_think=False, format_score_coef=format_score_coef, From fceaad4d28c4e4577433d4f4c1dedd825de2436a Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 14:49:44 +0800 Subject: [PATCH 2/9] add agentscope v1 adapter --- trinity/common/workflows/__init__.py | 3 +- .../workflows/agentscope/react/react_agent.py | 7 +- .../workflows/agentscope/react/templates.py | 11 +- .../common/workflows/agentscope_workflow.py | 104 +++++++++++++++++- 4 files changed, 107 insertions(+), 18 deletions(-) diff --git a/trinity/common/workflows/__init__.py b/trinity/common/workflows/__init__.py index b8496f8f41..ea7390b4a4 100644 --- a/trinity/common/workflows/__init__.py +++ b/trinity/common/workflows/__init__.py @@ -20,8 +20,9 @@ # tool_call "tool_call_workflow": "trinity.common.workflows.customized_toolcall_workflows.ToolCallWorkflow", # agentscope - "agentscope_react_workflow": "trinity.common.workflows.agentscope.react.react_workflow.AgentScopeReActWorkflow", "agentscope_workflow_adapter": "trinity.common.workflows.agentscope_workflow.AgentScopeWorkflowAdapter", + "agentscope_workflow_adapter_v1": "trinity.common.workflows.agentscope_workflow.AgentScopeWorkflowAdapterV1", + "agentscope_react_workflow": "trinity.common.workflows.agentscope.react.react_workflow.AgentScopeReActWorkflow", "agentscope_react_math_workflow": "trinity.common.workflows.envs.agentscope.agentscopev1_react_workflow.AgentScopeReactMathWorkflow", "as_react_workflow": "trinity.common.workflows.agentscope.react.react_workflow.AgentScopeReActWorkflow", "agentscopev0_react_math_workflow": "trinity.common.workflows.envs.agentscope.agentscopev0_react_workflow.AgentScopeV0ReactMathWorkflow", diff --git a/trinity/common/workflows/agentscope/react/react_agent.py b/trinity/common/workflows/agentscope/react/react_agent.py index 2ec114b405..0fb812a21e 100644 --- a/trinity/common/workflows/agentscope/react/react_agent.py +++ b/trinity/common/workflows/agentscope/react/react_agent.py @@ -1,4 +1,4 @@ -from typing import Dict, Type +from typing import Dict import openai from agentscope.agent import ReActAgent @@ -6,7 +6,6 @@ from agentscope.message import Msg from agentscope.model import OpenAIChatModel from agentscope.tool import Toolkit -from pydantic import BaseModel class AgentScopeReActAgent: @@ -55,6 +54,4 @@ async def reply(self, query: str) -> Dict: Returns: Dict: The structured response. """ - return await self.agent.reply( - msg=Msg("user", query, role="user") - ) + return await self.agent.reply(msg=Msg("user", query, role="user")) diff --git a/trinity/common/workflows/agentscope/react/templates.py b/trinity/common/workflows/agentscope/react/templates.py index ff85e90a2c..538efbcabb 100644 --- a/trinity/common/workflows/agentscope/react/templates.py +++ b/trinity/common/workflows/agentscope/react/templates.py @@ -1,10 +1,8 @@ -import re from dataclasses import dataclass from typing import Dict, Optional, Type -from pydantic import BaseModel, Field - from agentscope.message import Msg +from pydantic import BaseModel, Field from trinity.common.rewards import RewardFn from trinity.common.rewards.math_reward import MathBoxedRewardFn @@ -34,13 +32,6 @@ def __call__( # type: ignore [override] truth = str(truth) # parse the final answer from the response message result = response.get_text_content() - if result is not None: - # find the final answer in boxed format - match = re.search(pattern=r"\\boxed\{([^}]*)\}", string=result) - if match: - result = match.group(1).strip() - else: - result = None return super().__call__( response=result, truth=truth, diff --git a/trinity/common/workflows/agentscope_workflow.py b/trinity/common/workflows/agentscope_workflow.py index 0de1c41c2a..846d35b665 100644 --- a/trinity/common/workflows/agentscope_workflow.py +++ b/trinity/common/workflows/agentscope_workflow.py @@ -22,8 +22,8 @@ def __init__( from agentscope.model import TrinityChatModel except ImportError: raise ImportError( - "This workflow requires agentscope >= 0.1.6, please install " - "it via `pip install agentscope>=0.1.6`", + "This workflow requires agentscope >= 1.0.7, please install " + "it via `pip install agentscope>=1.0.7`", ) super().__init__( @@ -72,3 +72,103 @@ async def run_async(self) -> List[Experience]: """Run the workflow asynchronously and return experiences.""" reward = await self.workflow_func(self.task.raw_task, self.chat_model) # type: ignore [arg-type] return self.construct_experiences(reward) + + +class AgentScopeWorkflowAdapterV1(Workflow): + """A more general adapter to wrap agentscope trainable workflow and judge functions into a Trinity Workflow.""" + + is_async: bool = True + + def __init__( + self, + *, + task: Task, + model: ModelWrapper, + auxiliary_models: Optional[List[ModelWrapper]] = None, + ): + """Initialize the adapter with the task and model.""" + try: + from agentscope.model import TrinityChatModel + except ImportError: + raise ImportError( + "This workflow requires agentscope >= 1.0.11, please install " + "it via `pip install agentscope>=1.0.11`", + ) + + super().__init__( + task=task, + model=model, + auxiliary_models=auxiliary_models, + ) + self.workflow_func = task.workflow_args.get("workflow_func", None) + self.judge_func = task.workflow_args.get("judge_func", None) + + if self.workflow_func is None: + raise ValueError( + "The 'workflow_func' is not provided.", + ) + + self.chat_model: TrinityChatModel = TrinityChatModel( + model.get_openai_async_client(), + generate_kwargs={ + "temperature": self.task.rollout_args.temperature, + "top_p": self.task.rollout_args.top_p, + "max_tokens": self.task.rollout_args.max_tokens or 4096, + "logprobs": True, + "top_logprobs": self.task.rollout_args.logprobs, + }, + ) + self.auxiliary_chat_models = [ + TrinityChatModel( + aux_model, + ) + for aux_model in (self.auxiliary_models or []) + ] + + def construct_experiences( + self, + reward: float, + metrics: Dict, + ) -> List[Experience]: + """Construct experiences from the agent's interaction history. + + Args: + reward (float): The reward value to assign to each experience. + + Returns: + List: A list of Experience objects. + """ + exps = self.model.extract_experience_from_history() + for exp in exps: + exp.reward = reward + # only attach metrics to the last experience + if len(metrics) > 0: + exps[-1].metrics = metrics + return exps + + async def run_async(self) -> List[Experience]: + """Run the workflow asynchronously and return experiences.""" + try: + from agentscope.tuner import JudgeOutput, WorkflowOutput + except ImportError: + self.logger.error( + "Fail to import agentscope tuner related types. Please ensure agentscope>=1.0.11 is installed." + ) + + metrics = {} + workflow_output: WorkflowOutput = await self.workflow_func( + self.task.raw_task, self.chat_model, self.auxiliary_chat_models + ) # type: ignore [arg-type] + metrics.update(workflow_output.metrics or {}) + if self.judge_func is not None: + judge_output: JudgeOutput = await self.judge_func( + self.task.raw_task, workflow_output, self.auxiliary_chat_models + ) # type: ignore [arg-type] + reward = judge_output.reward + metrics.update(judge_output.metrics or {}) + else: + assert ( + workflow_output.reward is not None + ), "Either workflow or judge must provide reward." + reward = workflow_output.reward + return self.construct_experiences(reward, metrics) From 5f1d5797b35495ead63fd5f811dd1d30d3dcf8ae Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 16:00:24 +0800 Subject: [PATCH 3/9] fix type --- trinity/common/workflows/agentscope_workflow.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/trinity/common/workflows/agentscope_workflow.py b/trinity/common/workflows/agentscope_workflow.py index 846d35b665..10d56f9eb0 100644 --- a/trinity/common/workflows/agentscope_workflow.py +++ b/trinity/common/workflows/agentscope_workflow.py @@ -161,8 +161,11 @@ async def run_async(self) -> List[Experience]: ) # type: ignore [arg-type] metrics.update(workflow_output.metrics or {}) if self.judge_func is not None: + assert ( + workflow_output.response is not None + ), "Workflow must provide response for judging." judge_output: JudgeOutput = await self.judge_func( - self.task.raw_task, workflow_output, self.auxiliary_chat_models + self.task.raw_task, workflow_output.response, self.auxiliary_chat_models ) # type: ignore [arg-type] reward = judge_output.reward metrics.update(judge_output.metrics or {}) From bcb59accaef2d8fa9b89be3ef55a2979883614ba Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 16:04:15 +0800 Subject: [PATCH 4/9] reset workflow --- .../common/workflows/agentscope/react/react_agent.py | 12 ++++++++++-- .../workflows/agentscope/react/react_workflow.py | 1 + .../common/workflows/agentscope/react/templates.py | 7 ++----- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/trinity/common/workflows/agentscope/react/react_agent.py b/trinity/common/workflows/agentscope/react/react_agent.py index 0fb812a21e..0cf8babe22 100644 --- a/trinity/common/workflows/agentscope/react/react_agent.py +++ b/trinity/common/workflows/agentscope/react/react_agent.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, Type import openai from agentscope.agent import ReActAgent @@ -6,6 +6,7 @@ from agentscope.message import Msg from agentscope.model import OpenAIChatModel from agentscope.tool import Toolkit +from pydantic import BaseModel class AgentScopeReActAgent: @@ -15,6 +16,7 @@ def __init__( model_name: str, system_prompt: str, generate_kwargs: dict, + response_structure: Type[BaseModel], max_iters: int = 10, toolkit: Toolkit | None = None, ): @@ -41,9 +43,11 @@ def __init__( model=self.agent_model, formatter=OpenAIChatFormatter(), # we enable agentscope's meta tool to allow agent to call tools dynamically without pre-registration + enable_meta_tool=True, toolkit=toolkit, max_iters=max_iters, ) + self.response_structure = response_structure async def reply(self, query: str) -> Dict: """Generate a response from the agent given a query. @@ -54,4 +58,8 @@ async def reply(self, query: str) -> Dict: Returns: Dict: The structured response. """ - return await self.agent.reply(msg=Msg("user", query, role="user")) + + response = await self.agent.reply( + Msg("user", query, role="user"), structured_model=self.response_structure + ) + return response.metadata or {} diff --git a/trinity/common/workflows/agentscope/react/react_workflow.py b/trinity/common/workflows/agentscope/react/react_workflow.py index 4b25f1ece8..203af35623 100644 --- a/trinity/common/workflows/agentscope/react/react_workflow.py +++ b/trinity/common/workflows/agentscope/react/react_workflow.py @@ -57,6 +57,7 @@ def __init__( "temperature": self.task.rollout_args.temperature, "max_tokens": self.task.rollout_args.max_tokens or 4096, }, + response_structure=template.response_structure, ) async def run_async(self): diff --git a/trinity/common/workflows/agentscope/react/templates.py b/trinity/common/workflows/agentscope/react/templates.py index 538efbcabb..0b2ba2fd3f 100644 --- a/trinity/common/workflows/agentscope/react/templates.py +++ b/trinity/common/workflows/agentscope/react/templates.py @@ -1,7 +1,6 @@ from dataclasses import dataclass from typing import Dict, Optional, Type -from agentscope.message import Msg from pydantic import BaseModel, Field from trinity.common.rewards import RewardFn @@ -20,7 +19,7 @@ class GSM8KResponseStructure(BaseModel): class GSM8KRewardFn(MathBoxedRewardFn): def __call__( # type: ignore [override] self, - response: Msg, + response: dict, truth: str, format_score_coef: float = 0.1, **kwargs, @@ -30,10 +29,8 @@ def __call__( # type: ignore [override] truth = truth.split("####")[1].strip() else: truth = str(truth) - # parse the final answer from the response message - result = response.get_text_content() return super().__call__( - response=result, + response=response.get("result", ""), truth=truth, with_think=False, format_score_coef=format_score_coef, From 3a01291843b5f31d8e8c9daa023cd69ae4f60569 Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 16:38:20 +0800 Subject: [PATCH 5/9] fix comments --- trinity/common/workflows/agentscope_workflow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/trinity/common/workflows/agentscope_workflow.py b/trinity/common/workflows/agentscope_workflow.py index 10d56f9eb0..411153661f 100644 --- a/trinity/common/workflows/agentscope_workflow.py +++ b/trinity/common/workflows/agentscope_workflow.py @@ -120,7 +120,8 @@ def __init__( ) self.auxiliary_chat_models = [ TrinityChatModel( - aux_model, + openai_async_client=aux_model, + # TODO: customize generate_kwargs for auxiliary models if needed ) for aux_model in (self.auxiliary_models or []) ] @@ -134,6 +135,7 @@ def construct_experiences( Args: reward (float): The reward value to assign to each experience. + metrics (Dict): A dictionary of metrics to be attached to the last experience. Returns: List: A list of Experience objects. From 647e6a75da0b5040b333ddb38cc3b5d427abb8fc Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 16:48:21 +0800 Subject: [PATCH 6/9] fix comments --- trinity/common/workflows/agentscope_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trinity/common/workflows/agentscope_workflow.py b/trinity/common/workflows/agentscope_workflow.py index 411153661f..0a5254c8ec 100644 --- a/trinity/common/workflows/agentscope_workflow.py +++ b/trinity/common/workflows/agentscope_workflow.py @@ -144,7 +144,7 @@ def construct_experiences( for exp in exps: exp.reward = reward # only attach metrics to the last experience - if len(metrics) > 0: + if len(exps) > 0: exps[-1].metrics = metrics return exps From 7c0494a5e68c5e63a142cbf156d828e077d26300 Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 16:50:27 +0800 Subject: [PATCH 7/9] fix comment --- trinity/common/workflows/agentscope_workflow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/trinity/common/workflows/agentscope_workflow.py b/trinity/common/workflows/agentscope_workflow.py index 0a5254c8ec..a29594c1c0 100644 --- a/trinity/common/workflows/agentscope_workflow.py +++ b/trinity/common/workflows/agentscope_workflow.py @@ -153,7 +153,7 @@ async def run_async(self) -> List[Experience]: try: from agentscope.tuner import JudgeOutput, WorkflowOutput except ImportError: - self.logger.error( + raise ImportError( "Fail to import agentscope tuner related types. Please ensure agentscope>=1.0.11 is installed." ) From d241e0eff0759120d4a142cd373738906e694ee6 Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 18:06:16 +0800 Subject: [PATCH 8/9] remove dup --- trinity/common/workflows/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/trinity/common/workflows/__init__.py b/trinity/common/workflows/__init__.py index ea7390b4a4..d8f5ba102e 100644 --- a/trinity/common/workflows/__init__.py +++ b/trinity/common/workflows/__init__.py @@ -22,7 +22,6 @@ # agentscope "agentscope_workflow_adapter": "trinity.common.workflows.agentscope_workflow.AgentScopeWorkflowAdapter", "agentscope_workflow_adapter_v1": "trinity.common.workflows.agentscope_workflow.AgentScopeWorkflowAdapterV1", - "agentscope_react_workflow": "trinity.common.workflows.agentscope.react.react_workflow.AgentScopeReActWorkflow", "agentscope_react_math_workflow": "trinity.common.workflows.envs.agentscope.agentscopev1_react_workflow.AgentScopeReactMathWorkflow", "as_react_workflow": "trinity.common.workflows.agentscope.react.react_workflow.AgentScopeReActWorkflow", "agentscopev0_react_math_workflow": "trinity.common.workflows.envs.agentscope.agentscopev0_react_workflow.AgentScopeV0ReactMathWorkflow", From 2ab9fc02da21ec5b59aaef807bc6edf31c6ba905 Mon Sep 17 00:00:00 2001 From: pxc Date: Mon, 29 Dec 2025 18:14:56 +0800 Subject: [PATCH 9/9] rename workflow --- examples/agentscope_react/gsm8k.yaml | 2 +- trinity/common/workflows/__init__.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/agentscope_react/gsm8k.yaml b/examples/agentscope_react/gsm8k.yaml index c1b79f7016..102606c4ba 100644 --- a/examples/agentscope_react/gsm8k.yaml +++ b/examples/agentscope_react/gsm8k.yaml @@ -29,7 +29,7 @@ buffer: response_key: 'answer' rollout_args: temperature: 1.0 - default_workflow_type: 'as_react_workflow' + default_workflow_type: 'agentscope_react_workflow' eval_tasksets: [] trainer_input: experience_buffer: diff --git a/trinity/common/workflows/__init__.py b/trinity/common/workflows/__init__.py index d8f5ba102e..ea7390b4a4 100644 --- a/trinity/common/workflows/__init__.py +++ b/trinity/common/workflows/__init__.py @@ -22,6 +22,7 @@ # agentscope "agentscope_workflow_adapter": "trinity.common.workflows.agentscope_workflow.AgentScopeWorkflowAdapter", "agentscope_workflow_adapter_v1": "trinity.common.workflows.agentscope_workflow.AgentScopeWorkflowAdapterV1", + "agentscope_react_workflow": "trinity.common.workflows.agentscope.react.react_workflow.AgentScopeReActWorkflow", "agentscope_react_math_workflow": "trinity.common.workflows.envs.agentscope.agentscopev1_react_workflow.AgentScopeReactMathWorkflow", "as_react_workflow": "trinity.common.workflows.agentscope.react.react_workflow.AgentScopeReActWorkflow", "agentscopev0_react_math_workflow": "trinity.common.workflows.envs.agentscope.agentscopev0_react_workflow.AgentScopeV0ReactMathWorkflow",