Merge pull request #5 from steveandroulakis/validator-and-improvements

Validator and improvements
2026-03-15 05:58:08 +01:00 · 2025-02-03 13:35:02 -08:00
parent 8cf2e891e9 36d4bd5889
commit f97afb4b38
10 changed files with 173 additions and 18 deletions
--- a/README.md
+++ b/README.md
@@ -68,6 +68,7 @@ Note: I found the other (hosted) LLMs to be MUCH more reliable for this use case
 * Requires a Stripe key for the `create_invoice` tool. Set this in the `STRIPE_API_KEY` environment variable in .env
    * It's free to sign up and get a key at [Stripe](https://stripe.com/)
    * If you're lazy go to `tools/create_invoice.py` and replace the `create_invoice` function with the mock `create_invoice_example` that exists in the same file.
 * Requires a key from [Football Data](https://www.football-data.org). Sign up for a free account, then see the 'My Account' page to get your API token. Set `FOOTBALL_DATA_API_KEY` to this value.
 ## Configuring Temporal Connection
@@ -137,3 +138,12 @@ Access the UI at `http://localhost:5173`
 - Continue-as-new shouldn't be a big consideration for this use case (as it would take many conversational turns to trigger). Regardless, I should ensure that it's able to carry the agent state over to the new workflow execution.
 - Perhaps the UI should show when the LLM response is being retried (i.e. activity retry attempt because the LLM provided bad output)
 - Tests would be nice!
 # TODO for this branch
 ## Agent
 - We'll have to figure out which matches are where. No use going to Manchester for a match that isn't there.
 - The use of `###` in prompts I want excluded from the conversation history is a bit of a hack.
 ## Validator function
 - Probably keep data types, but move the activity and workflow code for the demo
 - Probably don't need the validator function if its the result from a tool call or confirmation step
--- a/activities/tool_activities.py
+++ b/activities/tool_activities.py
@@ -1,4 +1,3 @@
 from dataclasses import dataclass
 from temporalio import activity
 from ollama import chat, ChatResponse
 from openai import OpenAI
@@ -11,6 +10,7 @@ import google.generativeai as genai
 import anthropic
 import deepseek
 from dotenv import load_dotenv
 from models.data_types import ValidationInput, ValidationResult, ToolPromptInput
 load_dotenv(override=True)
 print(
@@ -23,13 +23,66 @@ if os.environ.get("LLM_PROVIDER") == "ollama":
    print("Using Ollama (local) model: " + os.environ.get("OLLAMA_MODEL_NAME"))
@dataclass
 class ToolPromptInput:
    prompt: str
    context_instructions: str
 class ToolActivities:
    @activity.defn
    async def validate_llm_prompt(
        self, validation_input: ValidationInput
    ) -> ValidationResult:
        """
        Validates the prompt in the context of the conversation history and agent goal.
        Returns a ValidationResult indicating if the prompt makes sense given the context.
        """
        # Create simple context string describing tools and goals
        tools_description = []
        for tool in validation_input.agent_goal.tools:
            tool_str = f"Tool: {tool.name}\n"
            tool_str += f"Description: {tool.description}\n"
            tool_str += "Arguments: " + ", ".join(
                [f"{arg.name} ({arg.type})" for arg in tool.arguments]
            )
            tools_description.append(tool_str)
        tools_str = "\n".join(tools_description)
        # Convert conversation history to string
        history_str = json.dumps(validation_input.conversation_history, indent=2)
        # Create context instructions
        context_instructions = f"""The agent goal and tools are as follows:
            Description: {validation_input.agent_goal.description}
            Available Tools:
            {tools_str}
            The conversation history to date is:
            {history_str}"""
        # Create validation prompt
        validation_prompt = f"""The user's prompt is: "{validation_input.prompt}"
            Please validate if this prompt makes sense given the agent goal and conversation history.
            If the prompt doesn't make sense toward the goal then validationResult should be true.
            Only return false if the prompt is nonsensical given the goal, tools available, and conversation history.
            Return ONLY a JSON object with the following structure:
                "validationResult": true/false,
                "validationFailedReason": "If validationResult is false, provide a clear explanation to the user 
                about why their request doesn't make sense in the context and what information they should provide instead.
                validationFailedReason should contain JSON in the format
                {{
                    "next": "question",
                    "response": "[your reason here and a response to get the user back on track with the agent goal]"
                }}
                If validationResult is true, return an empty dict {{}}"
            """
        # Call the LLM with the validation prompt
        prompt_input = ToolPromptInput(
            prompt=validation_prompt, context_instructions=context_instructions
        )
        result = self.prompt_llm(prompt_input)
        return ValidationResult(
            validationResult=result.get("validationResult", False),
            validationFailedReason=result.get("validationFailedReason", {}),
        )
    @activity.defn
    def prompt_llm(self, input: ToolPromptInput) -> dict:
        llm_provider = os.environ.get("LLM_PROVIDER", "openai").lower()
--- a/api/main.py
+++ b/api/main.py
@@ -132,3 +132,26 @@ async def end_chat():
        print(e)
        # Workflow not found; return an empty response
        return {}
@app.post("/start-workflow")
 async def start_workflow():
    # Create combined input
    combined_input = CombinedInput(
        tool_params=ToolWorkflowParams(None, None),
        agent_goal=goal_event_flight_invoice,
    )
    workflow_id = "agent-workflow"
    # Start the workflow with the starter prompt from the goal
    await temporal_client.start_workflow(
        ToolWorkflow.run,
        combined_input,
        id=workflow_id,
        task_queue=TEMPORAL_TASK_QUEUE,
        start_signal="user_prompt",
        start_signal_args=["### " + goal_event_flight_invoice.starter_prompt],
    )
    return {"message": f"Workflow started with goal's starter prompt: {goal_event_flight_invoice.starter_prompt}."}
--- a/frontend/src/pages/App.jsx
+++ b/frontend/src/pages/App.jsx
@@ -167,7 +167,7 @@ export default function App() {
        try {
            setError(INITIAL_ERROR_STATE);
            setLoading(true);
-            await apiService.sendMessage("I'd like to travel for an event.");
+            await apiService.startWorkflow();
            setConversation([]);
            setLastMessage(null);
        } catch (err) {
--- a/frontend/src/services/api.js
+++ b/frontend/src/services/api.js
@@ -56,6 +56,26 @@ export const apiService = {
        }
    },
    async startWorkflow() {
        try {
            const res = await fetch(
                `${API_BASE_URL}/start-workflow`,
                { 
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json'
                    }
                }
            );
            return handleResponse(res);
        } catch (error) {
            throw new ApiError(
                'Failed to start workflow',
                error.status || 500
            );
        }
    },
    async confirm() {
        try {
            const res = await fetch(`${API_BASE_URL}/confirm`, { 
--- a/models/data_types.py
+++ b/models/data_types.py
@@ -1,5 +1,5 @@
 from dataclasses import dataclass
-from typing import Optional, Deque
+from typing import Optional, Deque, Dict, Any, List, Union, Literal
 from models.tool_definitions import AgentGoal
@@ -13,3 +13,32 @@ class ToolWorkflowParams:
 class CombinedInput:
    tool_params: ToolWorkflowParams
    agent_goal: AgentGoal
 Message = Dict[str, Union[str, Dict[str, Any]]]
 ConversationHistory = Dict[str, List[Message]]
 NextStep = Literal["confirm", "question", "done"]
@dataclass
 class ToolPromptInput:
    prompt: str
    context_instructions: str
@dataclass
 class ValidationInput:
    prompt: str
    conversation_history: ConversationHistory
    agent_goal: AgentGoal
@dataclass
 class ValidationResult:
    validationResult: bool
    validationFailedReason: dict = None
    def __post_init__(self):
        # Initialize empty dict if None
        if self.validationFailedReason is None:
            self.validationFailedReason = {}
--- a/models/tool_definitions.py
+++ b/models/tool_definitions.py
@@ -20,6 +20,7 @@ class ToolDefinition:
 class AgentGoal:
    tools: List[ToolDefinition]
    description: str = "Description of the tools purpose and overall goal"
    starter_prompt: str = "Initial prompt to start the conversation"
    example_conversation_history: str = (
        "Example conversation history to help the AI agent understand the context of the conversation"
    )
--- a/scripts/run_worker.py
+++ b/scripts/run_worker.py
@@ -24,6 +24,7 @@ async def main():
            workflows=[ToolWorkflow],
            activities=[
                activities.prompt_llm,
                activities.validate_llm_prompt,
                dynamic_tool_activity,
            ],
            activity_executor=activity_executor,
--- a/tools/goal_registry.py
+++ b/tools/goal_registry.py
@@ -11,6 +11,7 @@ goal_event_flight_invoice = AgentGoal(
    "1. FindFixtures: Find fixtures for a team in a given month "
    "2. SearchFlights: search for a flight around the event dates "
    "3. CreateInvoice: Create a simple invoice for the cost of that flight ",
    starter_prompt="Welcome me, give me a description of what you can do, then ask me for the details you need to do your job",
    example_conversation_history="\n ".join(
        [
            "user: I'd like to travel to a football match",
--- a/workflows/tool_workflow.py
+++ b/workflows/tool_workflow.py
@@ -1,24 +1,21 @@
 from collections import deque
 from datetime import timedelta
-from typing import Dict, Any, Union, List, Optional, Deque, TypedDict, Literal
+from typing import Dict, Any, Union, List, Optional, Deque, TypedDict
 from temporalio.common import RetryPolicy
 from temporalio import workflow
 from models.data_types import ConversationHistory, NextStep, ValidationInput
 with workflow.unsafe.imports_passed_through():
-    from activities.tool_activities import ToolActivities, ToolPromptInput
+    from activities.tool_activities import ToolActivities
    from prompts.agent_prompt_generators import generate_genai_prompt
-    from models.data_types import CombinedInput, ToolWorkflowParams
+    from models.data_types import CombinedInput, ToolWorkflowParams, ToolPromptInput
 # Constants
 MAX_TURNS_BEFORE_CONTINUE = 250
 TOOL_ACTIVITY_TIMEOUT = timedelta(seconds=20)
-LLM_ACTIVITY_TIMEOUT = timedelta(seconds=60)
+LLM_ACTIVITY_TIMEOUT = timedelta(minutes=30)
 # Type definitions
 Message = Dict[str, Union[str, Dict[str, Any]]]
 ConversationHistory = Dict[str, List[Message]]
 NextStep = Literal["confirm", "question", "done"]
 class ToolData(TypedDict, total=False):
@@ -153,6 +150,26 @@ class ToolWorkflow:
                prompt = self.prompt_queue.popleft()
                if not prompt.startswith("###"):
                    self.add_message("user", prompt)
                    # Validate the prompt before proceeding
                    validation_input = ValidationInput(
                        prompt=prompt,
                        conversation_history=self.conversation_history,
                        agent_goal=agent_goal,
                    )
                    validation_result = await workflow.execute_activity(
                        ToolActivities.validate_llm_prompt,
                        args=[validation_input],
                        schedule_to_close_timeout=LLM_ACTIVITY_TIMEOUT,
                        retry_policy=RetryPolicy(initial_interval=timedelta(seconds=5)),
                    )
                    if not validation_result.validationResult:
                        # Handle validation failure
                        self.add_message("agent", validation_result.validationFailedReason)
                        continue  # Skip to the next iteration
                # Proceed with generating the context and prompt
                context_instructions = generate_genai_prompt(
                    agent_goal, self.conversation_history, self.tool_data