mucho fixes and prompt engineering

2026-03-15 14:08:08 +01:00 · 2025-04-24 20:16:24 -07:00
parent a5fc6ad477
commit a29b100017
5 changed files with 173 additions and 103 deletions
--- a/prompts/agent_prompt_generators.py
+++ b/prompts/agent_prompt_generators.py
@@ -2,10 +2,14 @@ from models.tool_definitions import AgentGoal
 from typing import Optional
 import json

-MULTI_GOAL_MODE:bool = None
+MULTI_GOAL_MODE: bool = None
+

 def generate_genai_prompt(
-    agent_goal: AgentGoal, conversation_history: str, multi_goal_mode:bool, raw_json: Optional[str] = None
+    agent_goal: AgentGoal,
+    conversation_history: str,
+    multi_goal_mode: bool,
+    raw_json: Optional[str] = None,
 ) -> str:
    """
    Generates a concise prompt for producing or validating JSON instructions
@@ -25,10 +29,12 @@ def generate_genai_prompt(
    prompt_lines.append(
        "This is the ongoing history to determine which tool and arguments to gather:"
    )
-    prompt_lines.append("BEGIN CONVERSATION HISTORY")
+    prompt_lines.append("*BEGIN CONVERSATION HISTORY*")
    prompt_lines.append(json.dumps(conversation_history, indent=2))
-    prompt_lines.append("END CONVERSATION HISTORY")
-    prompt_lines.append("")
+    prompt_lines.append("*END CONVERSATION HISTORY*")
+    prompt_lines.append(
+        "REMINDER: You can use the conversation history to infer arguments for the tools."
+    )

    # Example Conversation History (from agent_goal)
    if agent_goal.example_conversation_history:
@@ -84,7 +90,20 @@ def generate_genai_prompt(
        "2) If all required arguments are known, set next='confirm' and specify the tool.\n"
        "   The user will confirm before the tool is run.\n"
        f"3) {generate_toolchain_complete_guidance()}\n"
-        "4) response should be short and user-friendly.\n"
+        "4) response should be short and user-friendly.\n\n"
+        "Guardrails (always remember!)\n"
+        "1) If any required argument is missing, set next='question' and ask the user.\n"
+        "1) ALWAYS ask a question in your response if next='question'.\n"
+        "2) ALWAYS set next='confirm' if you have arguments\n "
+        'And respond with "let\'s proceed with <tool> (and any other useful info)" \n '
+        "3) You can carry over arguments from one tool to another.\n "
+        "EXAMPLE: If you asked for an account ID, then use the conversation history to infer that argument "
+        "going forward."
+        "4) If ListAgents in the conversation history is force_confirm='False', you MUST check "
+        + "if the current tool contains userConfirmation. If it does, please ask the user to confirm details "
+        + "with the user. userConfirmation overrides force_confirm='False'.\n"
+        + "EXAMPLE: (force_confirm='False' AND userConfirmation exists on tool) Would you like me to <run tool> "
+        + "with the following details: <details>?\n"
    )

    # Validation Task (If raw_json is provided)
@@ -110,14 +129,15 @@ def generate_genai_prompt(

    return "\n".join(prompt_lines)

+
 def generate_tool_completion_prompt(current_tool: str, dynamic_result: dict) -> str:
    """
    Generates a prompt for handling tool completion and determining next steps.
-    
+
    Args:
        current_tool: The name of the tool that just completed
        dynamic_result: The result data from the tool execution
-        
+
    Returns:
        str: A formatted prompt string for the agent to process the tool completion
    """
@@ -132,15 +152,18 @@ def generate_tool_completion_prompt(current_tool: str, dynamic_result: dict) ->
        f"{generate_pick_new_goal_guidance()}"
    )

-def generate_missing_args_prompt(current_tool: str, tool_data: dict, missing_args: list[str]) -> str:
+
+def generate_missing_args_prompt(
+    current_tool: str, tool_data: dict, missing_args: list[str]
+) -> str:
    """
    Generates a prompt for handling missing arguments for a tool.
-    
+
    Args:
        current_tool: The name of the tool that needs arguments
        tool_data: The current tool data containing the response
        missing_args: List of argument names that are missing
-        
+
    Returns:
        str: A formatted prompt string for requesting missing arguments
    """
@@ -150,13 +173,14 @@ def generate_missing_args_prompt(current_tool: str, tool_data: dict, missing_arg
        "Only provide a valid JSON response without any comments or metadata."
    )

-def set_multi_goal_mode_if_unset(mode:bool)->None:
+
+def set_multi_goal_mode_if_unset(mode: bool) -> None:
    """
    Set multi-mode (used to pass workflow)
-    
+
    Args:
        None
-        
+
    Returns:
        bool: True if in multi-goal mode, false if not
    """
@@ -164,44 +188,47 @@ def set_multi_goal_mode_if_unset(mode:bool)->None:
    if MULTI_GOAL_MODE is None:
        MULTI_GOAL_MODE = mode

-def is_multi_goal_mode()-> bool:
+
+def is_multi_goal_mode() -> bool:
    """
    Centralized logic for if we're in multi-goal mode.
-    
+
    Args:
        None
-        
+
    Returns:
        bool: True if in multi-goal mode, false if not
    """
    return MULTI_GOAL_MODE

-def generate_pick_new_goal_guidance()-> str:
+
+def generate_pick_new_goal_guidance() -> str:
    """
    Generates a prompt for guiding the LLM to pick a new goal or be done depending on multi-goal mode.
-    
+
    Args:
        None
-        
+
    Returns:
        str: A prompt string prompting the LLM to when to go to pick-new-goal
    """
-    if is_multi_goal_mode(): 
+    if is_multi_goal_mode():
        return 'Next should only be "pick-new-goal" if all tools have been run for the current goal (use the system prompt to figure that out), or the user explicitly requested to pick a new goal.'
-    else: 
+    else:
        return 'Next should never be "pick-new-goal".'

+
 def generate_toolchain_complete_guidance() -> str:
    """
    Generates a prompt for guiding the LLM to handle the end of the toolchain.
-    
+
    Args:
        None
-        
+
    Returns:
        str: A prompt string prompting the LLM to prompt for a new goal, or be done
    """
-    if is_multi_goal_mode(): 
+    if is_multi_goal_mode():
        return "If no more tools are needed (user_confirmed_tool_run has been run for all), set next='confirm' and tool='ListAgents'."
-    else :
-        return "If no more tools are needed (user_confirmed_tool_run has been run for all), set next='done' and tool=''."
+    else:
+        return "If no more tools are needed (user_confirmed_tool_run has been run for all), set next='done' and tool=''."
--- a/tools/init.py
+++ b/tools/init.py
@@ -56,17 +56,17 @@ def get_handler(tool_name: str):
    if tool_name == "FinCheckAccountIsValid":
        return check_account_valid
    if tool_name == "FinCheckAccountBalance":
-        return get_account_balance    
-    if tool_name == "FinMoveMoneyOrder":
+        return get_account_balance
+    if tool_name == "FinMoveMoney":
        return move_money
    if tool_name == "FinCheckAccountSubmitLoanApproval":
-        return submit_loan_application    
+        return submit_loan_application
    if tool_name == "GetOrder":
        return get_order
    if tool_name == "TrackPackage":
        return track_package
    if tool_name == "ListOrders":
-        return list_orders     
+        return list_orders
    if tool_name == "GiveHint":
        return give_hint
    if tool_name == "GuessLocation":
--- a/tools/data/customer_account_data.json
+++ b/tools/data/customer_account_data.json
@@ -1,10 +1,10 @@
-{ 
+{
    "accounts": [
        {
            "name": "Matt Murdock",
            "email": "matt.murdock@nelsonmurdock.com",
            "account_id": "11235",
-            "checking_balance": 875.40,
+            "checking_balance": 875.4,
            "savings_balance": 3200.15,
            "bitcoin_balance": 0.1378,
            "account_creation_date": "2014-03-10"
@@ -13,8 +13,8 @@
            "name": "Foggy Nelson",
            "email": "foggy.nelson@nelsonmurdock.com",
            "account_id": "112358",
-            "checking_balance": 1523.67,
-            "savings_balance": 4875.90,
+            "checking_balance": "1523.66",
+            "savings_balance": "4875.89",
            "bitcoin_balance": 0.0923,
            "account_creation_date": "2014-03-10"
        },
@@ -23,7 +23,7 @@
            "email": "karen.page@nelsonmurdock.com",
            "account_id": "112",
            "checking_balance": 645.25,
-            "savings_balance": 1830.50,
+            "savings_balance": "830.5",
            "bitcoin_balance": 0.0456,
            "account_creation_date": "2015-01-15"
        },
@@ -31,7 +31,7 @@
            "name": "Wilson Fisk",
            "email": "wilson.fisk@fiskcorp.com",
            "account_id": "11",
-            "checking_balance": 25000.00,
+            "checking_balance": 25000.0,
            "savings_balance": 150000.75,
            "bitcoin_balance": 5987.6721,
            "account_creation_date": "2013-09-20"
@@ -40,8 +40,8 @@
            "name": "Frank Castle",
            "email": "frank.castle@vigilante.net",
            "account_id": "1",
-            "checking_balance": 320.10,
-            "savings_balance": 0.30,
+            "checking_balance": 320.1,
+            "savings_balance": 0.3,
            "bitcoin_balance": 15.2189,
            "account_creation_date": "2016-02-05"
        },
@@ -49,8 +49,8 @@
            "name": "Joshua Smith",
            "email": "joshmsmith@gmail.com",
            "account_id": "11235813",
-            "checking_balance": 3021.90,
-            "savings_balance": 500.50,
+            "checking_balance": 3021.9,
+            "savings_balance": 500.5,
            "bitcoin_balance": 0.001,
            "account_creation_date": "2020-03-19"
        }
--- a/tools/fin/move_money.py
+++ b/tools/fin/move_money.py
@@ -11,7 +11,7 @@ from shared.config import get_temporal_client

 from enum import Enum, auto

-#enums for the java enum
+# enums for the java enum
 # class ExecutionScenarios(Enum):
 #     HAPPY_PATH = 0
 #     ADVANCED_VISIBILITY = auto() # 1
@@ -20,6 +20,7 @@ from enum import Enum, auto
 #     BUG_IN_WORKFLOW = auto()     # 4
 #     INVALID_ACCOUNT = auto()     # 5

+
 # these dataclasses are for calling the Temporal Workflow
 # Python equivalent of the workflow we're calling's Java WorkflowParameterObj
@dataclass
@@ -27,103 +28,130 @@ class MoneyMovementWorkflowParameterObj:
    amount: int  # Using snake_case as per Python conventions
    scenario: str

+
 # this is made to demonstrate functionality but it could just as durably be an API call
 # this assumes it's a valid account - use check_account_valid() to verify that first
 async def move_money(args: dict) -> dict:
-    
+
    account_key = args.get("email_address_or_account_ID")
    account_type: str = args.get("accounttype")
    amount = args.get("amount")
    destinationaccount = args.get("destinationaccount")

-    file_path = Path(__file__).resolve().parent.parent / "data" / "customer_account_data.json"
+    file_path = (
+        Path(__file__).resolve().parent.parent / "data" / "customer_account_data.json"
+    )
    if not file_path.exists():
        return {"error": "Data file not found."}
-    
-    # todo validate there's enough money in the account
+
    with open(file_path, "r") as file:
        data = json.load(file)
    account_list = data["accounts"]

    for account in account_list:
        if account["email"] == account_key or account["account_id"] == account_key:
-            amount_str: str = str(amount)  # LLM+python gets sassy about types but we need it to be str
+            amount_str: str = str(amount)
            from_account_combo = account_key + account_type

-            transfer_workflow_id = await start_workflow(amount_cents=str_dollars_to_cents(amount_str),from_account_name=from_account_combo, to_account_name=destinationaccount)
-            
-            account_type_key = 'checking_balance'
-            if(account_type.casefold() == "checking" ):
-                account_type = "checking"
-                account_type_key = 'checking_balance'
-                
-            elif(account_type.casefold() == "savings" ):
-                account_type = "savings"
-                account_type_key = 'savings_balance'
-            else: 
-                raise NotImplementedError("money order for account types other than checking or savings is not implemented.")
-            
-            new_balance: float = float(str_dollars_to_cents(str(account[account_type_key]))) 
-            new_balance = new_balance - float(str_dollars_to_cents(amount_str))
-            account[account_type_key] = str(new_balance / 100 ) #to dollars
-            with open(file_path, 'w') as file:
-                json.dump(data, file, indent=4)            
+            transfer_workflow_id = await start_workflow(
+                amount_cents=str_dollars_to_cents(amount_str),
+                from_account_name=from_account_combo,
+                to_account_name=destinationaccount,
+            )
+
+            if account_type.casefold() == "checking":
+                from_key = "checking_balance"
+            elif account_type.casefold() == "savings":
+                from_key = "savings_balance"
+            else:
+                return_msg = "Money order for account types other than checking or savings is not implemented."
+                return {"error": return_msg}
+
+            to_key = (
+                "savings_balance"
+                if destinationaccount.casefold() == "savings"
+                else "checking_balance"
+            )
+
+            # Update from-account balance
+            from_balance = float(str_dollars_to_cents(str(account[from_key])))
+            from_balance -= float(str_dollars_to_cents(amount_str))
+            account[from_key] = str(from_balance / 100)
+
+            # Update destination-account balance
+            to_balance = float(str_dollars_to_cents(str(account[to_key])))
+            to_balance += float(str_dollars_to_cents(amount_str))
+            account[to_key] = str(to_balance / 100)
+
+            with open(file_path, "w") as file:
+                json.dump(data, file, indent=4)
+
+            return {
+                "status": "money movement complete",
+                "confirmation id": transfer_workflow_id,
+                "new_balance": account[from_key],
+                "destination_balance": account[to_key],
+            }

-            return {'status': "money movement complete", 'confirmation id': transfer_workflow_id, 'new_balance': account[account_type_key]}                                
-        
    return_msg = "Account not found with for " + account_key
    return {"error": return_msg}

+
 # Async function to start workflow
-async def start_workflow(amount_cents: int, from_account_name: str, to_account_name: str)-> str:
- 
+async def start_workflow(
+    amount_cents: int, from_account_name: str, to_account_name: str
+) -> str:
+
    start_real_workflow = os.getenv("FIN_START_REAL_WORKFLOW")
    if start_real_workflow is not None and start_real_workflow.lower() == "false":
        START_REAL_WORKFLOW = False
    else:
        START_REAL_WORKFLOW = True
-    
+
    if START_REAL_WORKFLOW:
-        # Connect to Temporal 
+        # Connect to Temporal
        client = await get_temporal_client()
        # Create the parameter object
        params = MoneyMovementWorkflowParameterObj(
-            amount=amount_cents,  
-            scenario="HAPPY_PATH"
+            amount=amount_cents, scenario="HAPPY_PATH"
        )

-        workflow_id="TRANSFER-ACCT-" + from_account_name +  "-TO-" + to_account_name  # business-relevant workflow ID
+        workflow_id = (
+            "TRANSFER-ACCT-" + from_account_name + "-TO-" + to_account_name
+        )  # business-relevant workflow ID

-        try: 
+        try:
            handle = await client.start_workflow(
                "moneyTransferWorkflow",  # Workflow name
-                params,          # Workflow parameters
+                params,  # Workflow parameters
                id=workflow_id,
-                task_queue="MoneyTransferJava"  # Task queue name
+                task_queue="MoneyTransferJava",  # Task queue name
            )
            return handle.id
        except WorkflowAlreadyStartedError as e:
            existing_handle = client.get_workflow_handle(workflow_id=workflow_id)
            return existing_handle.id
-    else: 
-        return "TRANSFER-ACCT-" + from_account_name +  "-TO-" + to_account_name + "not-real"
+    else:
+        return (
+            "TRANSFER-ACCT-" + from_account_name + "-TO-" + to_account_name + "not-real"
+        )

-    
-#cleans a string dollar amount description to cents value
+
+# cleans a string dollar amount description to cents value
 def str_dollars_to_cents(dollar_str: str) -> int:
    try:
        # Remove '$' and any whitespace
-        cleaned_str = dollar_str.replace('$', '').strip()
-        
+        cleaned_str = dollar_str.replace("$", "").strip()
+
        # Handle empty string or invalid input
        if not cleaned_str:
            raise ValueError("Empty amount provided")
-            
+
        # Convert to float and then to cents
        amount = float(cleaned_str)
        if amount < 0:
            raise ValueError("Negative amounts not allowed")
-            
+
        return int(amount * 100)
    except ValueError as e:
-        raise ValueError(f"Invalid dollar amount format: {dollar_str}") from e
+        raise ValueError(f"Invalid dollar amount format: {dollar_str}") from e
--- a/tools/tool_registry.py
+++ b/tools/tool_registry.py
@@ -1,4 +1,5 @@
 from models.tool_definitions import ToolDefinition, ToolArgument
+
 # ----- System tools -----
 list_agents_tool = ToolDefinition(
    name="ListAgents",
@@ -21,12 +22,13 @@ change_goal_tool = ToolDefinition(
 give_hint_tool = ToolDefinition(
    name="GiveHint",
    description="Give a hint to the user regarding the location of the pirate treasure. Use previous conversation to determine the hint_total, it should initially be 0 ",
-    arguments=[        
+    arguments=[
        ToolArgument(
            name="hint_total",
            type="number",
            description="How many hints have been given",
-        ),],
+        ),
+    ],
 )

 guess_location_tool = ToolDefinition(
@@ -54,7 +56,8 @@ guess_location_tool = ToolDefinition(
 # ----- Travel use cases tools -----
 search_flights_tool = ToolDefinition(
    name="SearchFlights",
-    description="Search for return flights from an origin to a destination within a date range (dateDepart, dateReturn).",
+    description="Search for return flights from an origin to a destination within a date range (dateDepart, dateReturn). "
+    "You are allowed to suggest dates from the conversation history, but ALWAYS ask the user if ok.",
    arguments=[
        ToolArgument(
            name="origin",
@@ -76,6 +79,12 @@ search_flights_tool = ToolDefinition(
            type="ISO8601",
            description="End of date range in human readable format, when you want to return",
        ),
+        ToolArgument(
+            name="userConfirmation",
+            type="string",
+            description="Indication of the user's desire to search flights, and to confirm the details "
+            + "before moving on to the next step",
+        ),
    ],
 )

@@ -115,6 +124,11 @@ book_trains_tool = ToolDefinition(
            type="string",
            description="The IDs of the trains to book, comma separated",
        ),
+        ToolArgument(
+            name="userConfirmation",
+            type="string",
+            description="Indication of user's desire to book train tickets",
+        ),
    ],
 )

@@ -132,6 +146,11 @@ create_invoice_tool = ToolDefinition(
            type="string",
            description="A description of the item details to be invoiced, inferred from the conversation history.",
        ),
+        ToolArgument(
+            name="userConfirmation",
+            type="string",
+            description="Indication of user's desire to create an invoice",
+        ),
    ],
 )

@@ -278,7 +297,6 @@ financial_get_account_balances = ToolDefinition(
    name="FinCheckAccountBalance",
    description="Get account balance for your accounts. "
    "Returns the account balances of your accounts. ",
-    
    arguments=[
        ToolArgument(
            name="email_address_or_account_ID",
@@ -289,10 +307,9 @@ financial_get_account_balances = ToolDefinition(
 )

 financial_move_money = ToolDefinition(
-    name="FinMoveMoneyOrder",
-    description="Execute a money movement order. "
-    "Returns the status of the order and the account balance of the account money was moved from. ",
-    
+    name="FinMoveMoney",
+    description="Send money from one account to another under the same acount ID (e.g. checking to savings). "
+    "Returns the status of the order and the new balances in each account. ",
    arguments=[
        ToolArgument(
            name="email_address_or_account_ID",
@@ -303,16 +320,16 @@ financial_move_money = ToolDefinition(
            name="accounttype",
            type="string",
            description="account type, such as checking or savings",
-        ),        
+        ),
        ToolArgument(
            name="amount",
            type="string",
-            description="amount to move in the order",
+            description="amount to move in the order (e.g. checking or savings)",
        ),
        ToolArgument(
            name="destinationaccount",
            type="string",
-            description="account number to move the money to",
+            description="account to move the money to (e.g. checking or savings)",
        ),
        ToolArgument(
            name="userConfirmation",
@@ -324,16 +341,14 @@ financial_move_money = ToolDefinition(

 financial_submit_loan_approval = ToolDefinition(
    name="FinCheckAccountSubmitLoanApproval",
-    description="Submit a loan application. "
-    "Returns the loan status. ",
-    
+    description="Submit a loan application. " "Returns the loan status. ",
    arguments=[
        ToolArgument(
            name="email_address_or_account_ID",
            type="string",
            description="email address or account ID of user",
-        ),  
-         ToolArgument(
+        ),
+        ToolArgument(
            name="amount",
            type="string",
            description="amount requested for the loan",
@@ -381,4 +396,4 @@ ecomm_track_package = ToolDefinition(
            description="Indication of user's desire to get package tracking information",
        ),
    ],
-)
+)