Merge pull request #6 from joshmsmith/main

syncing dev
2026-03-15 14:08:08 +01:00 · 2025-04-10 09:48:29 -04:00
parent 50d3e3d638 1e22f3ee4c
commit 6517b624ee
22 changed files with 499 additions and 229 deletions
--- a/.env.example
+++ b/.env.example
@@ -35,13 +35,17 @@ OPENAI_API_KEY=sk-proj-...
 # Uncomment if using API key (not needed for local dev server)
 # TEMPORAL_API_KEY=abcdef1234567890
-# Set starting goal of agent
+# Set starting goal of agent - if unset default is goal_choose_agent_type
-AGENT_GOAL=goal_choose_agent_type  # (default) 
+AGENT_GOAL=goal_choose_agent_type  # for multi-goal  start
 #AGENT_GOAL=goal_event_flight_invoice # for original goal
 #AGENT_GOAL=goal_match_train_invoice # for replay goal
-#Choose which category(ies) of goals you want to be listed by the Agent - options are system (always included), hr, travel, or all. 
+#Choose which category(ies) of goals you want to be listed by the Agent Goal picker if enabled above
-GOAL_CATEGORIES=hr,travel,fin # default is all
+# - options are system (always included), hr, travel, or all. 
 GOAL_CATEGORIES=hr,travel-flights,travel-trains,fin # default is all
 #GOAL_CATEGORIES=travel-flights
-# Set if the UI should force a user confirmation step or not
+# Set if the workflow should wait for the user to click a confirm button (and if the UI should show the confirm button and tool args)
 SHOW_CONFIRM=True
 # Money Scenarios: 
--- a/.gitignore
+++ b/.gitignore
@@ -32,3 +32,4 @@ coverage.xml
 .idea/
 .env
 *.env
--- a/README.md
+++ b/README.md
@@ -12,6 +12,18 @@ It's really helpful to [watch the demo (5 minute YouTube video)](https://www.you
 There are a lot of AI and Agentic AI tools out there, and more on the way. But why Temporal? Temporal gives this system reliablity, state management, a code-first approach that we really like, built-in observability and easy error handling.
 For more, check out [architecture-decisions](./architecture-decisions.md).
 ## What is "Agentic AI"?
 These are the key elements of an agentic framework:
 1. Goals a human can get done, made up of tools that can execute individual steps
 2. The "agent loop" - call LLM, either call tools or prompt human, repeat until goal(s) are done
 3. Support for tool calls that require human input and approval
 4. Use of an LLM to check human input for relevance before calling the 'real' LLM
 5. use of an LLM to summarize and compact the conversation history
 6. Prompt construction (made of system prompts, conversation history, and tool metadata - sent to the LLM to create user prompts)
 7. Bonus: durable tool execution via Temporal Activities
 For a deeper dive into this, check out the [architecture guide](./architecture.md).
 ## Setup and Configuration
 See [the Setup guide](./setup.md).
@@ -28,9 +40,21 @@ See [the architecture guide](./architecture.md).
 - Perhaps the UI should show when the LLM response is being retried (i.e. activity retry attempt because the LLM provided bad output)
 - Tests would be nice! [See tests](./tests/).
 See [the todo](./todo.md) for more details.
 See [the guide to adding goals and tools](./adding-goals-and-tools.md) for more ways you can add features.
 ## For Temporal SAs
 Check out the [slides](https://docs.google.com/presentation/d/1wUFY4v17vrtv8llreKEBDPLRtZte3FixxBUn0uWy5NU/edit#slide=id.g3333e5deaa9_0_0) here and the enablement guide here (TODO).
 ## Tests
 Running the tests requires `poe` and `pytest_asyncio` to be installed.
    python -m pip install poethepoet
    python -m pip install pytest_asyncio
 Once you have `poe` and `pytest_asyncio` installed you can run:
    poe test
--- a/activities/tool_activities.py
+++ b/activities/tool_activities.py
@@ -11,7 +11,7 @@ import google.generativeai as genai
 import anthropic
 import deepseek
 from dotenv import load_dotenv
-from models.data_types import ValidationInput, ValidationResult, ToolPromptInput
+from models.data_types import EnvLookupOutput, ValidationInput, ValidationResult, ToolPromptInput, EnvLookupInput
 load_dotenv(override=True)
 print(
@@ -50,7 +50,7 @@ class ToolActivities:
            else:
                print("Warning: OPENAI_API_KEY not set but LLM_PROVIDER is 'openai'")
-        if self.llm_provider == "grok":
+        elif self.llm_provider == "grok":
            if os.environ.get("GROK_API_KEY"):
                self.grok_client = OpenAI(api_key=os.environ.get("GROK_API_KEY"), base_url="https://api.x.ai/v1")
                print("Initialized grok client")
@@ -370,7 +370,8 @@ class ToolActivities:
            print("Initialized Anthropic client on demand")
        response = self.anthropic_client.messages.create(
-            model="claude-3-5-sonnet-20241022",  # todo try claude-3-7-sonnet-20250219
+            #model="claude-3-5-sonnet-20241022",  # todo try claude-3-7-sonnet-20250219
            model="claude-3-7-sonnet-20250219",  # todo try claude-3-7-sonnet-20250219
            max_tokens=1024,
            system=input.context_instructions
            + ". The current date is "
@@ -471,6 +472,32 @@ class ToolActivities:
            print(f"Full response: {response_content}")
            raise
    # get env vars for workflow
    @activity.defn
    async def get_wf_env_vars(self, input: EnvLookupInput) -> EnvLookupOutput:
        """ gets env vars for workflow as an activity result so it's deterministic
            handles default/None
        """
        output: EnvLookupOutput = EnvLookupOutput(show_confirm=input.show_confirm_default, 
                                                  multi_goal_mode=True)
        show_confirm_value = os.getenv(input.show_confirm_env_var_name)
        if show_confirm_value is None:
            output.show_confirm = input.show_confirm_default
        elif show_confirm_value is not None and show_confirm_value.lower() == "false":
            output.show_confirm = False
        else:
            output.show_confirm = True
        first_goal_value = os.getenv("AGENT_GOAL")
        if first_goal_value is None:
            output.multi_goal_mode = True # default if unset
        elif first_goal_value is not None and first_goal_value.lower() != "goal_choose_agent_type":
            output.multi_goal_mode = False
        else:
            output.multi_goal_mode = True
        return output
 def get_current_date_human_readable():
    """
@@ -487,8 +514,6 @@ def get_current_date_human_readable():
 async def dynamic_tool_activity(args: Sequence[RawValue]) -> dict:
    from tools import get_handler
    #  if current_tool == "move_money":
    #     workflow.logger.warning(f"trying for move_money direct")
    tool_name = activity.info().activity_type  # e.g. "FindEvents"
    tool_args = activity.payload_converter().from_payload(args[0].payload, dict)
    activity.logger.info(f"Running dynamic tool '{tool_name}' with args: {tool_args}")
@@ -503,3 +528,5 @@ async def dynamic_tool_activity(args: Sequence[RawValue]) -> dict:
    # Optionally log or augment the result
    activity.logger.info(f"Tool '{tool_name}' result: {result}")
    return result
--- a/adding-goals-and-tools.md
+++ b/adding-goals-and-tools.md
@@ -1,16 +1,16 @@
-## Customizing the Agent
+# Customizing the Agent
 The agent is set up to allow for multiple goals and to switch back to choosing a new goal at the end of every successful goal. A goal is made up of a list of tools that the agent will guide the user through. 
 It may be helpful to review the [architecture](./architecture.md) for a guide and definition of goals, tools, etc.
-### Adding a New Goal Category
+## Adding a New Goal Category
 Goal Categories lets you pick which groups of goals to show. Set via an .env setting, GOAL_CATEGORIES.
 1. Pick a unique one that has some business meaning
 2. Use it in your .env file
 3. Add to [.env.example](./.env.example)
 4. Use it in your Goal definition, see below.
-### Adding a Goal
+## Adding a Goal
 1.  Open [/tools/goal_registry.py](tools/goal_registry.py) - this file contains descriptions of goals and the tools used to achieve them
 2. Pick a name for your goal! (such as "goal_hr_schedule_pto")
 3. Fill out the required elements:
@@ -19,14 +19,13 @@ Goal Categories lets you pick which groups of goals to show. Set via an .env set
 - `category_tag`: category for the goal
 - `agent_friendly_description`: user-facing description of what the agent/chatbot does
 - `tools`: the list of tools the goal will walk the user through. These will be defined in the [tools/tool_registry.py](tools/tool_registry.py) and should be defined in list form as tool_registry.[name of tool]
- Important! The last tool listed must be `list_agents_tool`. This allows the chatbot to guide the user back to choosing from the list of available goals once a goal is complete.<br />
+
 Example:
 ```
 tools=[
    tool_registry.current_pto_tool,
    tool_registry.future_pto_calc_tool,
    tool_registry.book_pto_tool,
    tool_registry.list_agents_tool,
 ]
 ```
 - `description`: LLM-facing description of the goal that lists the tools by name and purpose.
@@ -34,9 +33,9 @@ tools=[
 - `example_conversation_history`: LLM-facing sample conversation/interaction regarding the goal. See the existing goals for how to structure this.
 4. Add your new goal to the `goal_list` at the bottom using `goal_list.append(your_super_sweet_new_goal)`
-### Adding Tools
+## Adding Tools
-#### Notes
+### Optional Tools
 Tools can be optional - you can indicate this in the tool listing of goal description (see above section re: goal registry) by adding something like, "This step is optional and can be skipped by moving to the next tool." Here is an example from an older iteration of the `goal_hr_schedule_pto` goal, when it was going to have an optional step to check for existing calendar conflicts:
 ```
@@ -47,62 +46,42 @@ description="Help the user gather args for these tools in order: "
    "4. BookPTO: Book PTO "
 ```
-#### Add to Tool Registry
+Tools should generally return meaningful information and be generally ‘failsafe’ in returning a useful result based on input.
 (If you're doing a local data approach like those in [.tools/data/](./tools/data/)) it's good to document how they can be setup to get a good result in tool specific [setup](./setup.md).
 ### Add to Tool Registry
 1.  Open [/tools/tool_registry.py](tools/tool_registry.py) - this file contains mapping of tool names to tool definitions (so the AI understands how to use them)
 2. Define the tool
 - `name`: name of the tool - this is the name as defined in the goal description list of tools. The name should be (sort of) the same as the tool name given in the goal description. So, if the description lists "CurrentPTO" as a tool, the name here should be `current_pto_tool`.
 - `description`: LLM-facing description of tool
 - `arguments`: These are the _input_ arguments to the tool. Each input argument should be defined as a [ToolArgument](./models/tool_definitions.py). Tools don't have to have arguments but the arguments list has to be declared. If the tool you're creating doesn't have inputs, define arguments as `arguments=[]`
-#### Create Each Tool
+### Create Each Tool
 - The tools themselves are defined in their own files in `/tools` - you can add a subfolder to organize them, see the hr tools for an example.
 - The file name and function name will be the same as each other and should also be the same as the name of the tool, without "tool" - so `current_pto_tool` would be `current_pto.py` with a function named `current_pto` within it.
 - The function should have `args: dict` as the input and also return a `dict`
 - The return dict should match the output format you specified in the goal's `example_conversation_history`
 - tools are where the user input+model output becomes deterministic. Add validation here to make sure what the system is doing is valid and acceptable
-#### Add to `tools/__init__.py` and the tool get_handler()
+### Add to `tools/__init__.py` and the tool get_handler()
 - In [tools/__init__.py](./tools/__init__.py), add an import statement for each new tool as well as an applicable return statement in `get_handler`. The tool name here should match the tool name as described in the goal's `description` field.
 Example:
 ```
 if tool_name == "CurrentPTO":
    return current_pto
 ```
-
+## Tool Confirmation
-### Existing Travel Goals
+There are three ways to manage confirmation of tool runs:
-
+1. Arguments confirmation box - confirm tool arguments and execution with a button click
-The agent can be configured to pursue different goals using the `AGENT_GOAL` environment variable in your `.env` file.
+   -  Can be disabled by env setting: `SHOW_CONFIRM=FALSE`
-
+2. Soft prompt confirmation via asking the model to prompt for confirmation: “Are you ready to be invoiced for the total cost of the train tickets?” in the [goal_registry](./tools/goal_registry.py).
-#### Goal: Find an event in Australia / New Zealand, book flights to it and invoice the user for the cost
+3. Hard confirmation requirement as a tool argument. See for example the PTO Scheduling Tool:
- `AGENT_GOAL=goal_event_flight_invoice` (default) - Helps users find events, book flights, and arrange train travel with invoice generation
+```Python
-    - This is the scenario in the [original video](https://www.youtube.com/watch?v=GEXllEH2XiQ)
+        ToolArgument(
-
+            name="userConfirmation",
-#### Goal: Find a Premier League match, book train tickets to it and invoice the user for the cost
+            type="string",
- `AGENT_GOAL=goal_match_train_invoice` - Focuses on Premier League match attendance with train booking and invoice generation
+            description="Indication of user's desire to book PTO",
-    - This is a new goal that is part of the [Replay 2025 talk](https://www.youtube.com/watch?v=YDxAWrIBQNE).
+        ),
-
+```
-If not specified, the agent defaults to all goals. Each goal, including these, comes with its own set of tools and conversation flows designed for specific use cases. You can examine `tools/goal_registry.py` to see the detailed configuration of each goal.
+If you really want to wait for user confirmation, record it on the workflow (as a Signal) and not rely on the LLM to probably get it, use option #3. 
-
+I recommend exploring all three. For a demo, I would decide if you want the Arguments confirmation in the UI, and if not I'd generally go with option #2 but use #3 for tools that make business sense to confirm, e.g. those tools that take action/write data.
 See the next section for tool configuration for these goals.
 #### Configuring Travel Goal Tools
 ##### Agent Goal: goal_event_flight_invoice (default)
 * The agent uses a mock function to search for events. This has zero configuration.
 * By default the agent uses a mock function to search for flights.
    * If you want to use the real flights API, go to `tools/search_flights.py` and replace the `search_flights` function with `search_flights_real_api` that exists in the same file.
    * It's free to sign up at [RapidAPI](https://rapidapi.com/apiheya/api/sky-scrapper)
    * This api might be slow to respond, so you may want to increase the start to close timeout, `TOOL_ACTIVITY_START_TO_CLOSE_TIMEOUT` in `workflows/workflow_helpers.py`
 * Requires a Stripe key for the `create_invoice` tool. Set this in the `STRIPE_API_KEY` environment variable in .env
    * It's free to sign up and get a key at [Stripe](https://stripe.com/)
    * If you're lazy go to `tools/create_invoice.py` and replace the `create_invoice` function with the mock `create_invoice_example` that exists in the same file.
 ##### Agent Goal: goal_match_train_invoice
 * Finding a match requires a key from [Football Data](https://www.football-data.org). Sign up for a free account, then see the 'My Account' page to get your API token. Set `FOOTBALL_DATA_API_KEY` to this value.
    * If you're lazy go to `tools/search_fixtures.py` and replace the `search_fixtures` function with the mock `search_fixtures_example` that exists in the same file.
 * We use a mock function to search for trains. Start the train API server to use the real API: `python thirdparty/train_api.py`
 * * The train activity is 'enterprise' so it's written in C# and requires a .NET runtime. See the [.NET backend](#net-(enterprise)-backend) section for details on running it.
 * Requires a Stripe key for the `create_invoice` tool. Set this in the `STRIPE_API_KEY` environment variable in .env
    * It's free to sign up and get a key at [Stripe](https://stripe.com/)
    * If you're lazy go to `tools/create_invoice.py` and replace the `create_invoice` function with the mock `create_invoice_example` that exists in the same file.
--- a/assets/temporal-agentic-ai-slides.pdf
+++ b/assets/temporal-agentic-ai-slides.pdf
--- a/models/data_types.py
+++ b/models/data_types.py
@@ -42,3 +42,13 @@ class ValidationResult:
        # Initialize empty dict if None
        if self.validationFailedReason is None:
            self.validationFailedReason = {}
@dataclass
 class EnvLookupInput:
    show_confirm_env_var_name: str
    show_confirm_default: bool
@dataclass
 class EnvLookupOutput:
    show_confirm: bool
    multi_goal_mode: bool
--- a/prompts/agent_prompt_generators.py
+++ b/prompts/agent_prompt_generators.py
@@ -2,15 +2,17 @@ from models.tool_definitions import AgentGoal
 from typing import Optional
 import json
 MULTI_GOAL_MODE:bool = None
 def generate_genai_prompt(
-    agent_goal: AgentGoal, conversation_history: str, raw_json: Optional[str] = None
+    agent_goal: AgentGoal, conversation_history: str, multi_goal_mode:bool, raw_json: Optional[str] = None
 ) -> str:
    """
    Generates a concise prompt for producing or validating JSON instructions
    with the provided tools and conversation history.
    """
    prompt_lines = []
    set_multi_goal_mode_if_unset(multi_goal_mode)
    # Intro / Role
    prompt_lines.append(
@@ -81,7 +83,7 @@ def generate_genai_prompt(
        "1) If any required argument is missing, set next='question' and ask the user.\n"
        "2) If all required arguments are known, set next='confirm' and specify the tool.\n"
        "   The user will confirm before the tool is run.\n"
-        "3) If no more tools are needed (user_confirmed_tool_run has been run for all), set next='confirm' and tool='ListAgents'.\n"
+        f"3) {generate_toolchain_complete_guidance()}\n"
        "4) response should be short and user-friendly.\n"
    )
@@ -126,7 +128,8 @@ def generate_tool_completion_prompt(current_tool: str, dynamic_result: dict) ->
        '{"next": "<question|confirm|pick-new-goal|done>", "tool": "<tool_name or null>", "args": {"<arg1>": "<value1 or null>", "<arg2>": "<value2 or null>}, "response": "<plain text (can include \\n line breaks)>"}'
        "ONLY return those json keys (next, tool, args, response), nothing else. "
        'Next should be "question" if the tool is not the last one in the sequence. '
-        'Next should only be "pick-new-goal" if all tools have been run (use the system prompt to figure that out).'
+        'Next should be "done" if the user is asking to be done with the chat. '
        f"{generate_pick_new_goal_guidance()}"
    )
 def generate_missing_args_prompt(current_tool: str, tool_data: dict, missing_args: list[str]) -> str:
@@ -146,3 +149,59 @@ def generate_missing_args_prompt(current_tool: str, tool_data: dict, missing_arg
        f"and following missing arguments for tool {current_tool}: {missing_args}. "
        "Only provide a valid JSON response without any comments or metadata."
    )
 def set_multi_goal_mode_if_unset(mode:bool)->None:
    """
    Set multi-mode (used to pass workflow)
    Args:
        None
    Returns:
        bool: True if in multi-goal mode, false if not
    """
    global MULTI_GOAL_MODE
    if MULTI_GOAL_MODE is None:
        MULTI_GOAL_MODE = mode
 def is_multi_goal_mode()-> bool:
    """
    Centralized logic for if we're in multi-goal mode.
    Args:
        None
    Returns:
        bool: True if in multi-goal mode, false if not
    """
    return MULTI_GOAL_MODE
 def generate_pick_new_goal_guidance()-> str:
    """
    Generates a prompt for guiding the LLM to pick a new goal or be done depending on multi-goal mode.
    Args:
        None
    Returns:
        str: A prompt string prompting the LLM to when to go to pick-new-goal
    """
    if is_multi_goal_mode(): 
        return 'Next should only be "pick-new-goal" if all tools have been run (use the system prompt to figure that out) or the user explicitly requested to pick a new goal.'
    else: 
        return 'Next should never be "pick-new-goal".'
 def generate_toolchain_complete_guidance() -> str:
    """
    Generates a prompt for guiding the LLM to handle the end of the toolchain.
    Args:
        None
    Returns:
        str: A prompt string prompting the LLM to prompt for a new goal, or be done
    """
    if is_multi_goal_mode(): 
        return "If no more tools are needed (user_confirmed_tool_run has been run for all), set next='confirm' and tool='ListAgents'."
    else :
        return "If no more tools are needed (user_confirmed_tool_run has been run for all), set next='done' and tool=''."
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,12 @@ packages = [
 [tool.poetry.urls]
 "Bug Tracker" = "https://github.com/temporalio/samples-python/issues"
 [tool.poe.tasks]
 format = [{cmd = "black ."}, {cmd = "isort ."}]
 lint = [{cmd = "black --check ."}, {cmd = "isort --check-only ."}, {ref = "lint-types" }]
 lint-types = "mypy --check-untyped-defs --namespace-packages ."
 test = "pytest"
 [tool.poetry.dependencies]
 python = ">=3.10,<4.0"
 temporalio = "^1.8.0"
--- a/scripts/find_events_test.py
+++ b/scripts/find_events_test.py
@@ -1,8 +1,8 @@
-from tools.search_events import find_events
+from tools.search_flights import search_flights
 import json
 # Example usage
 if __name__ == "__main__":
    search_args = {"city": "Sydney", "month": "July"}
-    results = find_events(search_args)
+    results = search_flights(search_args)
    print(json.dumps(results, indent=2))
--- a/scripts/run_worker.py
+++ b/scripts/run_worker.py
@@ -62,6 +62,7 @@ async def main():
            activities=[
                activities.agent_validatePrompt,
                activities.agent_toolPlanner,
                activities.get_wf_env_vars,
                dynamic_tool_activity,
            ],
            activity_executor=activity_executor,
--- a/setup.md
+++ b/setup.md
@@ -8,48 +8,22 @@ cp .env.example .env
 ```
 Then add API keys, configuration, as desired.
-If you want to show confirmations/enable the debugging UI, set
+
 If you want to show confirmations/enable the debugging UI that shows tool args, set
 ```bash
 SHOW_CONFIRM=True
 ```
 ### Agent Goal Configuration
-The agent can be configured to pursue different goals using the `AGENT_GOAL` environment variable in your `.env` file.
+The agent can be configured to pursue different goals using the `AGENT_GOAL` environment variable in your `.env` file. If unset, default is `goal_choose_agent_type`. 
-#### Goal: Find an event in Australia / New Zealand, book flights to it and invoice the user for the cost
+If the first goal is `goal_choose_agent_type` the agent will support multiple goals using goal categories defined by `GOAL_CATEGORIES` in your .env file. If unset, default is all.
- `AGENT_GOAL=goal_event_flight_invoice` (default) - Helps users find events, book flights, and arrange train travel with invoice generation
+```bash
-    - This is the scenario in the video above
+GOAL_CATEGORIES=hr,travel-flights,travel-trains,fin
 ```
-#### Goal: Find a Premier League match, book train tickets to it and invoice the user for the cost
+See the section Goal-Specific Tool Configuration below for tool configuration for specific goals.
 - `AGENT_GOAL=goal_match_train_invoice` - Focuses on Premier League match attendance with train booking and invoice generation
    - This is a new goal that is part of an upcoming conference talk
 If not specified, the agent defaults to `goal_event_flight_invoice`. Each goal comes with its own set of tools and conversation flows designed for specific use cases. You can examine `tools/goal_registry.py` to see the detailed configuration of each goal.
 See the next section for tool configuration for each goal.
 ### Tool Configuration
 #### Agent Goal: goal_event_flight_invoice (default)
 * The agent uses a mock function to search for events. This has zero configuration.
 * By default the agent uses a mock function to search for flights.
    * If you want to use the real flights API, go to `tools/search_flights.py` and replace the `search_flights` function with `search_flights_real_api` that exists in the same file.
    * It's free to sign up at [RapidAPI](https://rapidapi.com/apiheya/api/sky-scrapper)
    * This api might be slow to respond, so you may want to increase the start to close timeout, `TOOL_ACTIVITY_START_TO_CLOSE_TIMEOUT` in `workflows/workflow_helpers.py`
 * Requires a Stripe key for the `create_invoice` tool. Set this in the `STRIPE_API_KEY` environment variable in .env
    * It's free to sign up and get a key at [Stripe](https://stripe.com/)
    * If you're lazy go to `tools/create_invoice.py` and replace the `create_invoice` function with the mock `create_invoice_example` that exists in the same file.
 #### Agent Goal: goal_match_train_invoice
 * Finding a match requires a key from [Football Data](https://www.football-data.org). Sign up for a free account, then see the 'My Account' page to get your API token. Set `FOOTBALL_DATA_API_KEY` to this value.
    * If you're lazy go to `tools/search_fixtures.py` and replace the `search_fixtures` function with the mock `search_fixtures_example` that exists in the same file.
 * We use a mock function to search for trains. Start the train API server to use the real API: `python thirdparty/train_api.py`
 * * The train activity is 'enterprise' so it's written in C# and requires a .NET runtime. See the [.NET backend](#net-(enterprise)-backend) section for details on running it.
 * Requires a Stripe key for the `create_invoice` tool. Set this in the `STRIPE_API_KEY` environment variable in .env
    * It's free to sign up and get a key at [Stripe](https://stripe.com/)
    * If you're lazy go to `tools/create_invoice.py` and replace the `create_invoice` function with the mock `create_invoice_example` that exists in the same file.
 ### LLM Provider Configuration
@@ -154,7 +128,40 @@ npx vite
 ```
 Access the UI at `http://localhost:5173`
-### Python Search Trains API
+
 ## Goal-Specific Tool Configuration
 Here is configuration guidance for specific goals. Travel and financial goals have configuration & setup as below.
 ### Goal: Find an event in Australia / New Zealand, book flights to it and invoice the user for the cost
 - `AGENT_GOAL=goal_event_flight_invoice` - Helps users find events, book flights, and arrange train travel with invoice generation
    - This is the scenario in the [original video](https://www.youtube.com/watch?v=GEXllEH2XiQ)
 #### Configuring Agent Goal: goal_event_flight_invoice 
 * The agent uses a mock function to search for events. This has zero configuration.
 * By default the agent uses a mock function to search for flights.
    * If you want to use the real flights API, go to `tools/search_flights.py` and replace the `search_flights` function with `search_flights_real_api` that exists in the same file.
    * It's free to sign up at [RapidAPI](https://rapidapi.com/apiheya/api/sky-scrapper)
    * This api might be slow to respond, so you may want to increase the start to close timeout, `TOOL_ACTIVITY_START_TO_CLOSE_TIMEOUT` in `workflows/workflow_helpers.py`
 * Requires a Stripe key for the `create_invoice` tool. Set this in the `STRIPE_API_KEY` environment variable in .env
    * It's free to sign up and get a key at [Stripe](https://stripe.com/)
        * Set permissions for read-write on: `Credit Notes, Invoices, Customers and Customer Sessions`
    * If you're lazy go to `tools/create_invoice.py` and replace the `create_invoice` function with the mock `create_invoice_example` that exists in the same file.
 ### Goal: Find a Premier League match, book train tickets to it and invoice the user for the cost (Replay 2025 Keynote)
 - `AGENT_GOAL=goal_match_train_invoice` - Focuses on Premier League match attendance with train booking and invoice generation
    - This goal was part of [Temporal's Replay 2025 conference keynote demo](https://www.youtube.com/watch?v=YDxAWrIBQNE)
    - Note, there is failure built in to this demo (the train booking step) to show how the agent can handle failures and retry. See Tool Configuration below for details.
 #### Configuring Agent Goal: goal_match_train_invoice 
 NOTE: This goal was developed for an on-stage demo and has failure (and its resolution) built in to show how the agent can handle failures and retry.
 * Finding a match requires a key from [Football Data](https://www.football-data.org). Sign up for a free account, then see the 'My Account' page to get your API token. Set `FOOTBALL_DATA_API_KEY` to this value.
    * If you're lazy go to `tools/search_fixtures.py` and replace the `search_fixtures` function with the mock `search_fixtures_example` that exists in the same file.
 * We use a mock function to search for trains. Start the train API server to use the real API: `python thirdparty/train_api.py`
 * * The train activity is 'enterprise' so it's written in C# and requires a .NET runtime. See the [.NET backend](#net-(enterprise)-backend) section for details on running it.
 * Requires a Stripe key for the `create_invoice` tool. Set this in the `STRIPE_API_KEY` environment variable in .env
    * It's free to sign up and get a key at [Stripe](https://stripe.com/)
    * If you're lazy go to `tools/create_invoice.py` and replace the `create_invoice` function with the mock `create_invoice_example` that exists in the same file.
 ##### Python Search Trains API
 > Agent Goal: goal_match_train_invoice only
 Required to search and book trains!
@@ -165,9 +172,18 @@ poetry run python thirdparty/train_api.py
 # http://localhost:8080/api/search?from=london&to=liverpool&outbound_time=2025-04-18T09:00:00&inbound_time=2025-04-20T09:00:00
 ```
-### .NET (enterprise) Backend ;)
+ ##### Python Train Legacy Worker
-> Agent Goal: goal_match_train_invoice only
+ > Agent Goal: goal_match_train_invoice only
 These are Python activities that fail (raise NotImplemented) to show how Temporal handles a failure. You can run these activities with.
 ```bash
 poetry run python scripts/run_legacy_worker.py 
 ```
 The activity will fail and be retried infinitely. To rescue the activity (and its corresponding workflows), kill the worker and run the .NET one in the section below.
 ##### .NET (enterprise) Worker ;)
 We have activities written in C# to call the train APIs.
 ```bash
 cd enterprise
@@ -176,14 +192,20 @@ dotnet run
 ```
 If you're running your train API above on a different host/port then change the API URL in `Program.cs`. Otherwise, be sure to run it using `python thirdparty/train_api.py`.
-### Money Movement Scenario
+#### Goals: FIN/Money Movement
-This scenario _can_ initiate a secondary workflow to move money. Check out [this repo](https://github.com/temporal-sa/temporal-money-transfer-java) - you'll need to get the worker running and connected to the same account as the agentic worker. 
+Make sure you have the mock users you want (such as yourself) in [the account mock data file](./tools/data/customer_account_data.json).
 - `AGENT_GOAL=goal_fin_move_money` - This scenario _can_ initiate a secondary workflow to move money. Check out [this repo](https://github.com/temporal-sa/temporal-money-transfer-java) - you'll need to get the worker running and connected to the same account as the agentic worker. 
 By default it will _not_ make a real workflow, it'll just fake it. If you get the worker running and want to start a workflow, in your [.env](./.env):
 ```bash
 FIN_START_REAL_WORKFLOW=FALSE #set this to true to start a real workflow
 ```
-## Customizing the Agent
+#### Goals: HR/PTO
 Make sure you have the mock users you want in (such as yourself) in [the PTO mock data file](./tools/data/employee_pto_data.json).
 ## Customizing the Agent Further
 - `tool_registry.py` contains the mapping of tool names to tool definitions (so the AI understands how to use them)
 - `goal_registry.py` contains descriptions of goals and the tools used to achieve them
 - The tools themselves are defined in their own files in `/tools`
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/agent_goal_workflow_test.py
+++ b/tests/agent_goal_workflow_test.py
@@ -1,55 +0,0 @@
 import asyncio
 from temporalio.client import Client, WorkflowExecutionStatus
 from temporalio.worker import Worker
 from temporalio.testing import TestWorkflowEnvironment
 from api.main import get_initial_agent_goal
 from models.data_types import AgentGoalWorkflowParams, CombinedInput
 from workflows import AgentGoalWorkflow
 from activities.tool_activities import ToolActivities, dynamic_tool_activity
 async def asyncSetUp(self):
    # Set up the test environment
    self.env = await TestWorkflowEnvironment.create_local()
 async def asyncTearDown(self):
    # Clean up after tests
    await self.env.shutdown()
 async def test_workflow_success(client: Client):
    # Register the workflow and activity
    # self.env.register_workflow(AgentGoalWorkflow)
    # self.env.register_activity(ToolActivities.agent_validatePrompt)
    # self.env.register_activity(ToolActivities.agent_toolPlanner)
    # self.env.register_activity(dynamic_tool_activity)
    task_queue_name = "agent-ai-workflow"
    workflow_id = "agent-workflow"
    initial_agent_goal = get_initial_agent_goal()
    # Create combined input
    combined_input = CombinedInput(
        tool_params=AgentGoalWorkflowParams(None, None),
        agent_goal=initial_agent_goal,
    )
    workflow_id = "agent-workflow"
    async with Worker(client, task_queue=task_queue_name, workflows=[AgentGoalWorkflow], activities=[ToolActivities.agent_validatePrompt, ToolActivities.agent_toolPlanner, dynamic_tool_activity]):
        handle = await client.start_workflow(
            AgentGoalWorkflow.run, id=workflow_id, task_queue=task_queue_name
        )
        # todo fix signals
        await handle.signal(AgentGoalWorkflow.submit_greeting, "user1")
        await handle.signal(AgentGoalWorkflow.submit_greeting, "user2")
        assert WorkflowExecutionStatus.RUNNING == (await handle.describe()).status
        await handle.signal(AgentGoalWorkflow.exit)
        assert ["Hello, user1", "Hello, user2"] == await handle.result()
        assert WorkflowExecutionStatus.COMPLETED == (await handle.describe()).status
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,55 @@
 import asyncio
 import multiprocessing
 import sys
 from typing import AsyncGenerator
 import pytest
 import pytest_asyncio
 from temporalio.client import Client
 from temporalio.testing import WorkflowEnvironment
 # Due to https://github.com/python/cpython/issues/77906, multiprocessing on
 # macOS starting with Python 3.8 has changed from "fork" to "spawn". For
 # pre-3.8, we are changing it for them.
 if sys.version_info < (3, 8) and sys.platform.startswith("darwin"):
    multiprocessing.set_start_method("spawn", True)
 def pytest_addoption(parser):
    parser.addoption(
        "--workflow-environment",
        default="local",
        help="Which workflow environment to use ('local', 'time-skipping', or target to existing server)",
    )
@pytest.fixture(scope="session")
 def event_loop():
    # See https://github.com/pytest-dev/pytest-asyncio/issues/68
    # See https://github.com/pytest-dev/pytest-asyncio/issues/257
    # Also need ProactorEventLoop on older versions of Python with Windows so
    # that asyncio subprocess works properly
    if sys.version_info < (3, 8) and sys.platform == "win32":
        loop = asyncio.ProactorEventLoop()
    else:
        loop = asyncio.get_event_loop_policy().new_event_loop()
    yield loop
    loop.close()
@pytest_asyncio.fixture(scope="session")
 async def env(request) -> AsyncGenerator[WorkflowEnvironment, None]:
    env_type = request.config.getoption("--workflow-environment")
    if env_type == "local":
        env = await WorkflowEnvironment.start_local()
    elif env_type == "time-skipping":
        env = await WorkflowEnvironment.start_time_skipping()
    else:
        env = WorkflowEnvironment.from_client(await Client.connect(env_type))
    yield env
    await env.shutdown()
@pytest_asyncio.fixture
 async def client(env: WorkflowEnvironment) -> Client:
    return env.client
--- a/tests/workflowtests/agent_goal_workflow_test.py
+++ b/tests/workflowtests/agent_goal_workflow_test.py
@@ -0,0 +1,80 @@
 from temporalio.client import Client, WorkflowExecutionStatus
 from temporalio.worker import Worker
 import concurrent.futures
 from temporalio.testing import WorkflowEnvironment
 from api.main import get_initial_agent_goal
 from models.data_types import AgentGoalWorkflowParams, CombinedInput
 from workflows.agent_goal_workflow import AgentGoalWorkflow
 from activities.tool_activities import ToolActivities, dynamic_tool_activity
 from unittest.mock import patch
 from dotenv import load_dotenv
 import os
 from contextlib import contextmanager
@contextmanager
 def my_context():
    print("Setup")
    yield "some_value"  # Value assigned to 'as' variable
    print("Cleanup")
 async def test_flight_booking(client: Client):
    #load_dotenv("test_flights_single.env")
    with my_context() as value:
        print(f"Working with {value}")
        # Create the test environment
        #env = await WorkflowEnvironment.start_local()
        #client = env.client
        task_queue_name = "agent-ai-workflow"
        workflow_id = "agent-workflow"
        with concurrent.futures.ThreadPoolExecutor(max_workers=100) as activity_executor:        
            worker = Worker(
                client, 
                task_queue=task_queue_name,
                workflows=[AgentGoalWorkflow],
                activities=[ToolActivities.agent_validatePrompt, ToolActivities.agent_toolPlanner, ToolActivities.get_wf_env_vars, dynamic_tool_activity],
                activity_executor=activity_executor,
            )
            async with worker:                 
                initial_agent_goal = get_initial_agent_goal()
                # Create combined input
                combined_input = CombinedInput(
                    tool_params=AgentGoalWorkflowParams(None, None),
                    agent_goal=initial_agent_goal,
                )
                prompt="Hello!"
                #async with Worker(client, task_queue=task_queue_name, workflows=[AgentGoalWorkflow], activities=[ToolActivities.agent_validatePrompt, ToolActivities.agent_toolPlanner, dynamic_tool_activity]):
                # todo set goal categories for scenarios
                handle = await client.start_workflow(
                    AgentGoalWorkflow.run,
                    combined_input,
                    id=workflow_id, 
                    task_queue=task_queue_name,
                    start_signal="user_prompt",
                    start_signal_args=[prompt],
                )
                # todo send signals to simulate user input
                # await handle.signal(AgentGoalWorkflow.user_prompt, "book flights") # for multi-goal
                await handle.signal(AgentGoalWorkflow.user_prompt, "sydney in september")
                assert WorkflowExecutionStatus.RUNNING == (await handle.describe()).status
                #assert ["Hello, user1", "Hello, user2"] == await handle.result()
                await handle.signal(AgentGoalWorkflow.user_prompt, "I'm all set, end conversation")
                #assert WorkflowExecutionStatus.COMPLETED == (await handle.describe()).status
                result = await handle.result()
                #todo dump workflow history for analysis optional
                #todo assert result is good
--- a/todo.md
+++ b/todo.md
@@ -1,27 +1,27 @@
 # todo list
-[ ] try claude-3-7-sonnet-20250219, see [tool_activities.py](./activities/tool_activities.py) <br />
+[ ] goal change management tweaks <br />
-[x] make agent respond to name of goals and not just numbers <br />
+  - [x] maybe make the choose_Agent_goal tag not be system/not always included? <br />
-[x] josh to do fintech scenarios <br />
+  - [x] try taking out list-agents as a tool because agent_prompt_generators may do it for you <br />
-[ ] expand [tests](./tests/agent_goal_workflow_test.py)<br />
+  - [x] make goal selection not be a system tool but be an option in .env, see how that works, includes taking it out of the goal/toolset for all goals <br />
  - [x] test single-goal <br />
  - [x] test claude and grok<br />
  - [x] document in sample env and docs how to control <br />
-[ ] fintech goals <br />
+[ ] expand [tests](./tests/agent_goal_workflow_test.py)<br />
 [x] try claude-3-7-sonnet-20250219, see [tool_activities.py](./activities/tool_activities.py) <br />
 [x] test Grok with changes
 [ ] adding fintech goals <br />
 - Fraud Detection and Prevention - The AI monitors transactions across accounts, flagging suspicious activities (e.g., unusual spending patterns or login attempts) and autonomously freezing accounts or notifying customers and compliance teams.<br />
 - Personalized Financial Advice - An AI agent analyzes a customer’s financial data (e.g., income, spending habits, savings, investments) and provides tailored advice, such as budgeting tips, investment options, or debt repayment strategies.<br />
 - Portfolio Management and Rebalancing - The AI monitors a customer’s investment portfolio, rebalancing it automatically based on market trends, risk tolerance, and financial goals (e.g., shifting assets between stocks, bonds, or crypto).<br />
-[x] money movement - start money transfer <br />
+
 [x] todo use env vars to do connect to local or non-local
 [x] account balance - <br />
 [ ] new loan/fraud check/update with start <br />
 [ ] ask the ai agent how it did at the end of the conversation, was it efficient? successful? insert a search attribute to document that before return <br />
 - Insight into the agent’s performance <br />
 [ ] non-retry the api key error - "Invalid API Key provided: sk_test_**J..." and "AuthenticationError" <br />
 [ ] add visual feedback when workflow starting <br />
-[ ] figure out how to allow user to list agents at any time - like end conversation <br />
+[ ] enable user to list agents at any time - like end conversation - probably with a next step<br />
-
+ - with changing "'Next should only be "pick-new-goal" if all tools have been run (use the system prompt to figure that out).'" in [prompt_generators](./prompts/agent_prompt_generators.py).
 [ ] change initial goal selection prompt to list capabilities and prompt more nicely - not a bulleted list - see how that works
 [x] todo use env vars to do connect to local or non-local cloud for activities for money scenarios
--- a/tools/data/employee_pto_data.json
+++ b/tools/data/employee_pto_data.json
@@ -11,6 +11,16 @@
        "email": "laine@awesome.com",
        "currentPTOHrs": 40,
        "hrsAddedPerMonth": 12
      },
      {
        "email": "steve.this.is.for.you@gmail.com",
        "currentPTOHrs": 4000,
        "hrsAddedPerMonth": 20
      },
      {
        "email": "your_email_here@yourcompany.com",
        "currentPTOHrs": 150,
        "hrsAddedPerMonth": 19
      } 
    ]
  }
--- a/tools/goal_registry.py
+++ b/tools/goal_registry.py
@@ -21,7 +21,7 @@ starter_prompt_generic = silly_prompt + "Welcome me, give me a description of wh
 goal_choose_agent_type = AgentGoal(
    id = "goal_choose_agent_type",
-    category_tag="system",
+    category_tag="agent_selection",
    agent_name="Choose Agent",
    agent_friendly_description="Choose the type of agent to assist you today.",
    tools=[
@@ -33,14 +33,14 @@ goal_choose_agent_type = AgentGoal(
        "1. ListAgents: List agents available to interact with. Do not ask for user confirmation for this tool. "
        "2. ChangeGoal: Change goal of agent "
        "After these tools are complete, change your goal to the new goal as chosen by the user. ",
-    starter_prompt=starter_prompt_generic + "Begin by listing all details of all agents as provided by the output of the first tool included in this goal. ",
+    starter_prompt=starter_prompt_generic + " Begin by listing all details of all agents as provided by the output of the first tool included in this goal. ",
    example_conversation_history="\n ".join(
        [
            "agent: Here are the currently available agents.",
            "user_confirmed_tool_run: <user clicks confirm on ListAgents tool>",
            "tool_result: { 'agent_name': 'Event Flight Finder', 'goal_id': 'goal_event_flight_invoice', 'agent_description': 'Helps users find interesting events and arrange travel to them' }",
-            "agent: The available agents are: 1. Event Flight Finder. \n Which agent would you like to speak to?",
+            "agent: The available agents are: 1. Event Flight Finder. \n Which agent would you like to speak to? (You can respond with name or number.)",
-            "user: 1",
+            "user: 1, Event Flight Finder",
            "user_confirmed_tool_run: <user clicks confirm on ChangeGoal tool>",
            "tool_result: { 'new_goal': 'goal_event_flight_invoice' }",
        ]
@@ -61,7 +61,6 @@ goal_pirate_treasure = AgentGoal(
    tools=[
        tool_registry.give_hint_tool,
        tool_registry.guess_location_tool,
        tool_registry.list_agents_tool, 
    ],
    description="The user wants to find a pirate treasure. "
        "Help the user gather args for these tools, in a loop, until treasure_found is True or the user requests to be done: "
@@ -98,7 +97,7 @@ goal_pirate_treasure = AgentGoal(
 goal_match_train_invoice = AgentGoal(
    id = "goal_match_train_invoice",
-    category_tag="travel",
+    category_tag="travel-trains",
    agent_name="UK Premier League Match Trip Booking",
    agent_friendly_description="Book a trip to a city in the UK around the dates of a premier league match.",
    tools=[
@@ -106,7 +105,6 @@ goal_match_train_invoice = AgentGoal(
        tool_registry.search_trains_tool,
        tool_registry.book_trains_tool,
        tool_registry.create_invoice_tool,
        tool_registry.list_agents_tool, #last tool must be list_agents to fasciliate changing back to picking an agent again at the end
    ],
    description="The user wants to book a trip to a city in the UK around the dates of a premier league match. "
    "Help the user find a premier league match to attend, search and book trains for that match and offers to invoice them for the cost of train tickets. "
@@ -146,14 +144,13 @@ goal_match_train_invoice = AgentGoal(
 goal_event_flight_invoice = AgentGoal(
    id = "goal_event_flight_invoice",
-    category_tag="travel",
+    category_tag="travel-flights",
    agent_name="Australia and New Zealand Event Flight Booking",
    agent_friendly_description="Book a trip to a city in Australia or New Zealand around the dates of events in that city.",    
    tools=[
        tool_registry.find_events_tool,
        tool_registry.search_flights_tool,
        tool_registry.create_invoice_tool,
        tool_registry.list_agents_tool, #last tool must be list_agents to fasciliate changing back to picking an agent again at the end
    ],
    description="Help the user gather args for these tools in order: "
    "1. FindEvents: Find an event to travel to "
@@ -193,7 +190,6 @@ goal_hr_schedule_pto = AgentGoal(
        tool_registry.current_pto_tool,
        tool_registry.future_pto_calc_tool,
        tool_registry.book_pto_tool,
        tool_registry.list_agents_tool, #last tool must be list_agents to fasciliate changing back to picking an agent again at the end
    ],
    description="The user wants to schedule paid time off (PTO) after today's date. To assist with that goal, help the user gather args for these tools in order: "
    "1. CurrentPTO: Tell the user how much PTO they currently have "
@@ -230,7 +226,6 @@ goal_hr_check_pto = AgentGoal(
    agent_friendly_description="Check your available PTO.",   
    tools=[
        tool_registry.current_pto_tool,
        tool_registry.list_agents_tool, #last tool must be list_agents to fasciliate changing back to picking an agent again at the end
    ],
    description="The user wants to check their paid time off (PTO) after today's date. To assist with that goal, help the user gather args for these tools in order: "
    "1. CurrentPTO: Tell the user how much PTO they currently have ",
@@ -252,11 +247,10 @@ goal_hr_check_pto = AgentGoal(
 goal_hr_check_paycheck_bank_integration_status = AgentGoal(
    id = "goal_hr_check_paycheck_bank_integration_status",
    category_tag="hr",
-    agent_name="Check paycheck bank integration status",
+    agent_name="Check paycheck deposit status",
-    agent_friendly_description="Check your integration between paycheck payer and your financial institution.",   
+    agent_friendly_description="Check your integration between your employer and your financial institution.",   
    tools=[
        tool_registry.paycheck_bank_integration_status_check,
        tool_registry.list_agents_tool, #last tool must be list_agents to fasciliate changing back to picking an agent again at the end
    ],
    description="The user wants to check their bank integration used to deposit their paycheck. To assist with that goal, help the user gather args for these tools in order: "
    "1. CheckPayBankStatus: Tell the user the status of their paycheck bank integration ",
@@ -283,7 +277,6 @@ goal_fin_check_account_balances = AgentGoal(
    tools=[
        tool_registry.financial_check_account_is_valid,
        tool_registry.financial_get_account_balances,
        tool_registry.list_agents_tool, #last tool must be list_agents to fasciliate changing back to picking an agent again at the end
    ],
    description="The user wants to check their account balances at the bank or financial institution. To assist with that goal, help the user gather args for these tools in order: "
    "1. FinCheckAccountIsValid: validate the user's account is valid"
@@ -318,7 +311,6 @@ goal_fin_move_money = AgentGoal(
        tool_registry.financial_check_account_is_valid,
        tool_registry.financial_get_account_balances,
        tool_registry.financial_move_money,
        tool_registry.list_agents_tool, #last tool must be list_agents to fasciliate changing back to picking an agent again at the end
    ],
    description="The user wants to transfer money in their account at the bank or financial institution. To assist with that goal, help the user gather args for these tools in order: "
    "1. FinCheckAccountIsValid: validate the user's account is valid"
@@ -333,7 +325,7 @@ goal_fin_move_money = AgentGoal(
            "user_confirmed_tool_run: <user clicks confirm on FincheckAccountIsValid tool>",
            "tool_result: { 'status': account valid }",
            "agent: Great! Here are your account balances:",
-            "user_confirmed_tool_run: <user clicks confirm on FinCheckAccountBalance tool>", #todo is this needed?
+            "user_confirmed_tool_run: <user clicks confirm on FinCheckAccountBalance tool>", 
            "tool_result: { 'name': Matt Murdock, 'email': matt.murdock@nelsonmurdock.com, 'account_id': 11235, 'checking_balance': 875.40, 'savings_balance': 3200.15, 'bitcoin_balance': 0.1378, 'account_creation_date': 2014-03-10 }",
            "agent: Your account balances are as follows: \n "
                "Checking: $875.40. \n "
@@ -348,7 +340,6 @@ goal_fin_move_money = AgentGoal(
    ),
 )
 #todo add money movement, fraud check (update with start)
 #Add the goals to a list for more generic processing, like listing available agents
 goal_list: List[AgentGoal] = []
 goal_list.append(goal_choose_agent_type)
--- a/tools/list_agents.py
+++ b/tools/list_agents.py
@@ -10,6 +10,12 @@ def list_agents(args: dict) -> dict:
        goal_categories_start.strip().lower() # handle extra spaces or non-lowercase
        goal_categories = goal_categories_start.split(",")
    # if multi-goal-mode, add agent_selection as a goal (defaults to True)
    if "agent_selection" not in goal_categories :
        first_goal_value = os.getenv("AGENT_GOAL")        
        if first_goal_value is None or first_goal_value.lower() == "goal_choose_agent_type":
            goal_categories.append("agent_selection")
    # always show goals labeled as "system," like the goal chooser
    if "system" not in goal_categories:
        goal_categories.append("system")
--- a/tools/tool_registry.py
+++ b/tools/tool_registry.py
@@ -255,6 +255,7 @@ paycheck_bank_integration_status_check = ToolDefinition(
    ],
 )
 # ----- Financial use cases tools -----
 financial_check_account_is_valid = ToolDefinition(
    name="FinCheckAccountIsValid",
    description="Check if an account is valid by email address or account ID. "
--- a/workflows/agent_goal_workflow.py
+++ b/workflows/agent_goal_workflow.py
@@ -1,12 +1,11 @@
 from collections import deque
 from datetime import timedelta
 import os
 from typing import Dict, Any, Union, List, Optional, Deque, TypedDict
 from temporalio.common import RetryPolicy
 from temporalio import workflow
-from models.data_types import ConversationHistory, NextStep, ValidationInput
+from models.data_types import ConversationHistory, EnvLookupOutput, NextStep, ValidationInput, EnvLookupInput
 from models.tool_definitions import AgentGoal
 from workflows.workflow_helpers import LLM_ACTIVITY_START_TO_CLOSE_TIMEOUT, \
    LLM_ACTIVITY_SCHEDULE_TO_CLOSE_TIMEOUT
@@ -26,12 +25,6 @@ with workflow.unsafe.imports_passed_through():
 # Constants
 MAX_TURNS_BEFORE_CONTINUE = 250
 show_confirm_env = os.getenv("SHOW_CONFIRM")
 if show_confirm_env is not None and show_confirm_env.lower() == "false":
    SHOW_CONFIRM = False
 else:
    SHOW_CONFIRM = True
 #ToolData as part of the workflow is what's accessible to the UI - see LLMResponse.jsx for example
 class ToolData(TypedDict, total=False):
    next: NextStep
@@ -50,9 +43,11 @@ class AgentGoalWorkflow:
        self.conversation_summary: Optional[str] = None
        self.chat_ended: bool = False
        self.tool_data: Optional[ToolData] = None
-        self.confirm: bool = False
+        self.confirmed: bool = False # indicates that we have confirmation to proceed to run tool
        self.tool_results: List[Dict[str, Any]] = []
        self.goal: AgentGoal = {"tools": []}
        self.show_tool_args_confirmation: bool = True # set from env file in activity lookup_wf_env_settings
        self.multi_goal_mode: bool = False # set from env file in activity lookup_wf_env_settings
    # see ../api/main.py#temporal_client.start_workflow() for how the input parameters are set
    @workflow.run
@@ -63,6 +58,8 @@ class AgentGoalWorkflow:
        params = combined_input.tool_params
        self.goal = combined_input.agent_goal
        await self.lookup_wf_env_settings(combined_input)
        # add message from sample conversation provided in tools/goal_registry.py, if it exists
        if params and params.conversation_summary:
            self.add_message("conversation_summary", params.conversation_summary)
@@ -83,7 +80,7 @@ class AgentGoalWorkflow:
        while True:
            # wait indefinitely for input from signals - user_prompt, end_chat, or confirm as defined below
            await workflow.wait_condition(
-                lambda: bool(self.prompt_queue) or self.chat_ended or self.confirm
+                lambda: bool(self.prompt_queue) or self.chat_ended or self.confirmed
            )
            # handle chat should end. When chat ends, push conversation history to workflow results.
@@ -128,7 +125,12 @@ class AgentGoalWorkflow:
                        continue
                # If valid, proceed with generating the context and prompt
-                context_instructions = generate_genai_prompt(self.goal, self.conversation_history, self.tool_data)
+                context_instructions = generate_genai_prompt(
                    agent_goal=self.goal, 
                    conversation_history = self.conversation_history, 
                    multi_goal_mode=self.multi_goal_mode, 
                    raw_json=self.tool_data)
                prompt_input = ToolPromptInput(prompt=prompt, context_instructions=context_instructions)
                # connect to LLM and execute to get next steps
@@ -141,7 +143,8 @@ class AgentGoalWorkflow:
                        initial_interval=timedelta(seconds=5), backoff_coefficient=1
                    ),
                )
-                tool_data["force_confirm"] = SHOW_CONFIRM
+
                tool_data["force_confirm"] = self.show_tool_args_confirmation
                self.tool_data = tool_data
                # process the tool as dictated by the prompt response - what to do next, and with which tool
@@ -150,30 +153,39 @@ class AgentGoalWorkflow:
                workflow.logger.info(f"next_step: {next_step}, current tool is {current_tool}")
-                #if the next step is to confirm...
+                # make sure we're ready to run the tool & have everything we need
                if next_step == "confirm" and current_tool:
                    args = tool_data.get("args", {})
-                    #if we're missing arguments, go back to the top of the loop
+                    # if we're missing arguments, ask for them 
                    if await helpers.handle_missing_args(current_tool, args, tool_data, self.prompt_queue):
                        continue
                    #...otherwise, if we want to force the user to confirm, set that up
                    waiting_for_confirm = True
                    if SHOW_CONFIRM:
                        self.confirm = False
                        workflow.logger.info("Waiting for user confirm signal...")
                    else:
                        #theory - set self.confirm to true bc that's the signal, so we can get around the signal??
                        self.confirm = True
-                # else if the next step is to pick a new goal...
+                    # We have needed arguments, if we want to force the user to confirm, set that up                    
                    if self.show_tool_args_confirmation:
                        self.confirmed = False # set that we're not confirmed 
                        workflow.logger.info("Waiting for user confirm signal...")
                    # if we have all needed arguments (handled above) and not holding for a debugging confirm, proceed:
                    else:
                        self.confirmed = True
                # else if the next step is to pick a new goal, set the goal and tool to do it
                elif next_step == "pick-new-goal":
                    workflow.logger.info("All steps completed. Resetting goal.")
                    self.change_goal("goal_choose_agent_type")
                    next_step = tool_data["next"] = "confirm"
                    current_tool = tool_data["tool"] = "ListAgents"
                    waiting_for_confirm = True
                    self.confirmed = True
-                # else if the next step is to be done - this should only happen if the user requests it via "end conversation"
+                # else if the next step is to be done with the conversation such as if the user requests it via asking to "end conversation"
                elif next_step == "done":
                    self.add_message("agent", tool_data)
                    #here we could send conversation to AI for analysis
                    # end the workflow
                    return str(self.conversation_history)
@@ -198,10 +210,10 @@ class AgentGoalWorkflow:
    #Signal that comes from api/main.py via a post to /confirm
    @workflow.signal
-    async def confirm(self) -> None:
+    async def confirmed(self) -> None:
        """Signal handler for user confirmation of tool execution."""
        workflow.logger.info("Received user signal: confirmation")
-        self.confirm = True
+        self.confirmed = True
    #Signal that comes from api/main.py via a post to /end-chat
    @workflow.signal
@@ -210,6 +222,20 @@ class AgentGoalWorkflow:
        workflow.logger.info("signal received: end_chat")
        self.chat_ended = True
    #Signal that can be sent from Temporal Workflow UI to enable debugging confirm and override .env setting
    @workflow.signal
    async def enable_debugging_confirm(self) -> None:
        """Signal handler for enabling debugging confirm UI & associated logic."""
        workflow.logger.info("signal received: enable_debugging_confirm")
        self.enable_debugging_confirm = True
    #Signal that can be sent from Temporal Workflow UI to disable debugging confirm and override .env setting
    @workflow.signal
    async def disable_debugging_confirm(self) -> None:
        """Signal handler for disabling debugging confirm UI & associated logic."""
        workflow.logger.info("signal received: disable_debugging_confirm")
        self.enable_debugging_confirm = False
    @workflow.query
    def get_conversation_history(self) -> ConversationHistory:
        """Query handler to retrieve the full conversation history."""
@@ -249,12 +275,11 @@ class AgentGoalWorkflow:
        )
    def change_goal(self, goal: str) -> None:
-        '''goalsLocal = {
+        """ Change the goal (usually on request of the user).
            "goal_match_train_invoice": goal_match_train_invoice,
            "goal_event_flight_invoice": goal_event_flight_invoice,
            "goal_choose_agent_type": goal_choose_agent_type,
        }'''
        Args: 
            goal: goal to change to)
        """
        if goal is not None:
            for listed_goal in goal_list:
                if listed_goal.id == goal:
@@ -274,7 +299,7 @@ class AgentGoalWorkflow:
    # define if we're ready for tool execution
    def ready_for_tool_execution(self, waiting_for_confirm: bool, current_tool: Any) -> bool:
-        if self.confirm and waiting_for_confirm and current_tool and self.tool_data:
+        if self.confirmed and waiting_for_confirm and current_tool and self.tool_data:
            return True
        else:
            return False
@@ -287,11 +312,27 @@ class AgentGoalWorkflow:
         else:
             return True
    # look up env settings in an activity so they're part of history
    async def lookup_wf_env_settings(self, combined_input: CombinedInput)->None:
        env_lookup_input = EnvLookupInput(
            show_confirm_env_var_name = "SHOW_CONFIRM", 
            show_confirm_default = True)
        env_output:EnvLookupOutput = await workflow.execute_activity(
            ToolActivities.get_wf_env_vars, 
            env_lookup_input,
            start_to_close_timeout=LLM_ACTIVITY_START_TO_CLOSE_TIMEOUT,
            retry_policy=RetryPolicy(
                initial_interval=timedelta(seconds=5), backoff_coefficient=1
            ),
        )
        self.show_tool_args_confirmation = env_output.show_confirm
        self.multi_goal_mode = env_output.multi_goal_mode
    # execute the tool - return False if we're not waiting for confirm anymore (always the case if it works successfully)
    # 
    async def execute_tool(self, current_tool: str)->bool:
        workflow.logger.info(f"workflow step: user has confirmed, executing the tool {current_tool}")
-        self.confirm = False
+        self.confirmed = False
        waiting_for_confirm = False
        confirmed_tool_data = self.tool_data.copy()
        confirmed_tool_data["next"] = "user_confirmed_tool_run"
@@ -317,5 +358,13 @@ class AgentGoalWorkflow:
                self.change_goal("goal_choose_agent_type")
        return waiting_for_confirm
-
+    # debugging helper - drop this in various places in the workflow to get status
    # also don't forget you can look at the workflow itself and do queries if you want
    def print_useful_workflow_vars(self, status_or_step:str) -> None:
        print(f"***{status_or_step}:***")
        print(f"force confirm? {self.tool_data['force_confirm']}")
        print(f"next step: {self.tool_data.get('next')}")
        print(f"current_tool: {self.tool_data.get('tool')}")
        print(f"self.confirm: {self.confirmed}")
        print(f"waiting_for_confirm (about to be set to true): {self.waiting_for_confirm}")