Fix mcp native tool goals (#58)

* frontend ui error configurable timeout

* fixed bug where goals with MCP stopped native tools from executing
This commit is contained in:
Steve Androulakis
2025-09-28 12:20:36 -07:00
committed by GitHub
parent e248a6778d
commit 98a1b75dff
3 changed files with 122 additions and 12 deletions

View File

@@ -27,7 +27,7 @@ goal_food_ordering = AgentGoal(
"When they express interest in items, get pricing using list_prices. " "When they express interest in items, get pricing using list_prices. "
"Add items to their cart using AddToCart as they decide - the order doesn't matter, multiple items can be added. " "Add items to their cart using AddToCart as they decide - the order doesn't matter, multiple items can be added. "
"After they're done selecting items, get their customer details and create a Stripe customer. " "After they're done selecting items, get their customer details and create a Stripe customer. "
"For checkout: 1) create_invoice, 2) create_invoice_item for each individual item (IMPORTANT: create_invoice_item does NOT accept quantity parameter - call it once per item, so if user wants 2 pizzas, call create_invoice_item twice with the same price), " "For checkout: 1) create_invoice (always include days_until_due so the invoice has a due date, e.g., days_until_due=7), 2) create_invoice_item for each individual item (IMPORTANT: create_invoice_item does NOT accept quantity parameter - call it once per item, so if user wants 2 pizzas, call create_invoice_item twice with the same price), "
"3) finalize_invoice. The finalized invoice will contain a hosted_invoice_url for payment.", "3) finalize_invoice. The finalized invoice will contain a hosted_invoice_url for payment.",
starter_prompt=starter_prompt_generic, starter_prompt=starter_prompt_generic,
example_conversation_history="\n ".join( example_conversation_history="\n ".join(
@@ -59,8 +59,8 @@ goal_food_ordering = AgentGoal(
"agent: Thank you Jane! Creating your Stripe customer profile with name and email.", "agent: Thank you Jane! Creating your Stripe customer profile with name and email.",
"user_confirmed_tool_run: <user clicks confirm on create_customer tool with name='Jane Smith' and email='jane.smith@example.com'>", "user_confirmed_tool_run: <user clicks confirm on create_customer tool with name='Jane Smith' and email='jane.smith@example.com'>",
'tool_result: {"customer": {"id": "cus_ExAmPlE12345", "name": "Jane Smith", "email": "jane.smith@example.com"}}', 'tool_result: {"customer": {"id": "cus_ExAmPlE12345", "name": "Jane Smith", "email": "jane.smith@example.com"}}',
"agent: Customer profile created! Now I'll create a draft invoice for your order.", "agent: Customer profile created! Now I'll create a draft invoice for your order with payment due in 7 days.",
"user_confirmed_tool_run: <user clicks confirm on create_invoice tool with customer='cus_ExAmPlE12345'>", "user_confirmed_tool_run: <user clicks confirm on create_invoice tool with customer='cus_ExAmPlE12345', days_until_due=7>",
'tool_result: {"invoice": {"id": "in_InvOicE54321", "status": "draft", "customer": "cus_ExAmPlE12345"}}', 'tool_result: {"invoice": {"id": "in_InvOicE54321", "status": "draft", "customer": "cus_ExAmPlE12345"}}',
"agent: Invoice created! Now adding your first Pepperoni Pizza to the invoice.", "agent: Invoice created! Now adding your first Pepperoni Pizza to the invoice.",
"user_confirmed_tool_run: <user clicks confirm on create_invoice_item tool with customer='cus_ExAmPlE12345', invoice='in_InvOicE54321', price='price_pepperoni_large'>", "user_confirmed_tool_run: <user clicks confirm on create_invoice_item tool with customer='cus_ExAmPlE12345', invoice='in_InvOicE54321', price='price_pepperoni_large'>",

View File

@@ -312,6 +312,109 @@ async def test_mcp_tool_execution_flow(client: Client):
assert captured["dynamic_args"]["server_definition"]["name"] == server_def.name assert captured["dynamic_args"]["server_definition"]["name"] == server_def.name
@pytest.mark.asyncio
async def test_create_invoice_defaults_days_until_due(client: Client):
"""create_invoice should include a default days_until_due when missing."""
task_queue_name = str(uuid.uuid4())
server_def = MCPServerDefinition(name="test", command="python", args=["srv.py"])
goal = AgentGoal(
id="g_invoice_default",
category_tag="food",
agent_name="agent",
agent_friendly_description="",
description="",
tools=[],
starter_prompt="",
example_conversation_history="",
mcp_server_definition=server_def,
)
combined_input = CombinedInput(
agent_goal=goal,
tool_params=AgentGoalWorkflowParams(
conversation_summary=None, prompt_queue=deque()
),
)
captured: dict = {}
@activity.defn(name="get_wf_env_vars")
async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
return EnvLookupOutput(show_confirm=True, multi_goal_mode=True)
@activity.defn(name="agent_validatePrompt")
async def mock_validate(prompt: ValidationInput) -> ValidationResult:
return ValidationResult(validationResult=True, validationFailedReason={})
@activity.defn(name="agent_toolPlanner")
async def mock_planner(input: ToolPromptInput) -> dict:
if "planner_called" not in captured:
captured["planner_called"] = True
return {
"next": "confirm",
"tool": "create_invoice",
"args": {"customer": "cus_123"},
"response": "Creating invoice",
}
return {"next": "done", "response": "done"}
@activity.defn(name="mcp_list_tools")
async def mock_mcp_list_tools(
server_definition: MCPServerDefinition, include_tools=None
):
return {
"server_name": server_definition.name,
"success": True,
"tools": {
"create_invoice": {
"name": "create_invoice",
"description": "",
"inputSchema": {
"properties": {
"customer": {"type": "string"},
"days_until_due": {"type": "number"},
}
},
},
},
"total_available": 1,
"filtered_count": 1,
}
@activity.defn(name="dynamic_tool_activity", dynamic=True)
async def mock_dynamic_tool_activity(args: Sequence[RawValue]) -> dict:
payload = activity.payload_converter().from_payload(args[0].payload, dict)
captured["dynamic_args"] = payload
return {"tool": "create_invoice", "success": True, "content": {"ok": True}}
async with Worker(
client,
task_queue=task_queue_name,
workflows=[AgentGoalWorkflow],
activities=[
mock_get_wf_env_vars,
mock_validate,
mock_planner,
mock_mcp_list_tools,
mock_dynamic_tool_activity,
],
):
handle = await client.start_workflow(
AgentGoalWorkflow.run,
combined_input,
id=str(uuid.uuid4()),
task_queue=task_queue_name,
)
await handle.signal(AgentGoalWorkflow.user_prompt, "make invoice")
await asyncio.sleep(0.5)
await handle.signal(AgentGoalWorkflow.confirm)
await asyncio.sleep(0.5)
await handle.result()
assert "dynamic_args" in captured
assert captured["dynamic_args"]["days_until_due"] == 7
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_mcp_tool_failure_recorded(client: Client): async def test_mcp_tool_failure_recorded(client: Client):
"""Failure of an MCP tool should be recorded in conversation history.""" """Failure of an MCP tool should be recorded in conversation history."""

View File

@@ -1,4 +1,3 @@
import inspect
from datetime import timedelta from datetime import timedelta
from typing import Any, Deque, Dict from typing import Any, Deque, Dict
@@ -22,18 +21,19 @@ LLM_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT = timedelta(minutes=30)
def is_mcp_tool(tool_name: str, goal: AgentGoal) -> bool: def is_mcp_tool(tool_name: str, goal: AgentGoal) -> bool:
"""Check if a tool is an MCP tool based on the goal's MCP server definition""" """Check if a tool should be dispatched via MCP."""
if not goal.mcp_server_definition: if not goal.mcp_server_definition:
return False return False
# Identify MCP tools by checking if they're not in the original static tools # Native tools are registered with tools.get_handler. If lookup succeeds,
import tools.tool_registry # the tool should execute locally; otherwise treat it as MCP-provided.
from tools import get_handler
return not any( try:
tool.name == tool_name get_handler(tool_name)
for _, tool in inspect.getmembers(tools.tool_registry) return False
if isinstance(tool, ToolDefinition) except ValueError:
) return True
async def handle_tool_execution( async def handle_tool_execution(
@@ -54,6 +54,13 @@ async def handle_tool_execution(
# Add server definition to args for MCP tools # Add server definition to args for MCP tools
mcp_args = tool_data["args"].copy() mcp_args = tool_data["args"].copy()
# Stripe's MCP server enforces days_until_due when the collection
# method defaults to send_invoice. Provide a reasonable default when
# the planner omits it so invoice creation doesn't fail upstream.
if current_tool == "create_invoice" and "days_until_due" not in mcp_args:
mcp_args["days_until_due"] = 7
mcp_args["server_definition"] = goal.mcp_server_definition mcp_args["server_definition"] = goal.mcp_server_definition
dynamic_result = await workflow.execute_activity( dynamic_result = await workflow.execute_activity(