From e35181b5adc2afdf01089fe75b14d2cd4fad83b4 Mon Sep 17 00:00:00 2001
From: Steve Androulakis <steve.androulakis@gmail.com>
Date: Thu, 29 May 2025 12:56:58 -0700
Subject: [PATCH] Temporal tests (#40)

* temporal tests

* codex setup env script to readme
---
 AGENTS.md                                     | 175 ++++++
 README.md                                     |  35 +-
 TESTING.md                                    | 163 ++++++
 pyproject.toml                                |   3 +-
 tests/README.md                               | 350 ++++++++++++
 tests/conftest.py                             |  63 +-
 tests/test_agent_goal_workflow.py             | 540 ++++++++++++++++++
 tests/test_tool_activities.py                 | 466 +++++++++++++++
 .../workflowtests/agent_goal_workflow_test.py |  44 +-
 9 files changed, 1832 insertions(+), 7 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 TESTING.md
 create mode 100644 tests/README.md
 create mode 100644 tests/test_agent_goal_workflow.py
 create mode 100644 tests/test_tool_activities.py

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..9421e1f
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,175 @@
+# Temporal AI Agent Contribution Guide
+
+## Repository Layout
+- `workflows/` - Temporal workflows including the main AgentGoalWorkflow for multi-turn AI conversations
+- `activities/` - Temporal activities for tool execution and LLM interactions  
+- `tools/` - AI agent tools organized by category (finance, HR, ecommerce, travel, etc.)
+- `models/` - Data types and tool definitions used throughout the system
+- `prompts/` - Agent prompt generators and templates
+- `api/` - FastAPI server that exposes REST endpoints to interact with workflows
+- `frontend/` - React-based web UI for chatting with the AI agent
+- `tests/` - Comprehensive test suite for workflows and activities using Temporal's testing framework
+- `enterprise/` - .NET worker implementation for enterprise activities (train booking)
+- `scripts/` - Utility scripts for running workers and testing tools
+
+## Running the Application
+
+### Quick Start with Docker
+```bash
+# Start all services with development hot-reload
+docker compose up -d
+
+# Quick rebuild without infrastructure
+docker compose up -d --no-deps --build api worker frontend
+```
+
+Default URLs:
+- Temporal UI: http://localhost:8080
+- API: http://localhost:8000  
+- Frontend: http://localhost:5173
+
+### Local Development Setup
+
+1. **Prerequisites:**
+   ```bash
+   # Install Poetry for Python dependency management
+   curl -sSL https://install.python-poetry.org | python3 -
+   
+   # Start Temporal server (Mac)
+   brew install temporal
+   temporal server start-dev
+   ```
+
+2. **Backend (Python):**
+   ```bash
+   # Quick setup using Makefile
+   make setup              # Creates venv and installs dependencies
+   make run-worker         # Starts the Temporal worker
+   make run-api            # Starts the API server
+   
+   # Or manually:
+   poetry install
+   poetry run python scripts/run_worker.py    # In one terminal
+   poetry run uvicorn api.main:app --reload   # In another terminal
+   ```
+
+3. **Frontend (React):**
+   ```bash
+   make run-frontend       # Using Makefile
+   
+   # Or manually:
+   cd frontend
+   npm install
+   npx vite
+   ```
+
+4. **Enterprise .NET Worker (optional):**
+   ```bash
+   make run-enterprise     # Using Makefile
+   
+   # Or manually:
+   cd enterprise
+   dotnet build
+   dotnet run
+   ```
+
+### Environment Configuration
+Copy `.env.example` to `.env` and configure:
+```bash
+# Required: LLM Configuration
+LLM_MODEL=openai/gpt-4o          # or anthropic/claude-3-sonnet, etc.
+LLM_KEY=your-api-key-here
+
+# Optional: Agent Goals and Categories  
+AGENT_GOAL=goal_choose_agent_type
+GOAL_CATEGORIES=hr,travel-flights,travel-trains,fin
+
+# Optional: Tool-specific APIs
+STRIPE_API_KEY=sk_test_...       # For invoice creation
+FOOTBALL_DATA_API_KEY=...        # For real football fixtures
+```
+
+## Testing
+
+The project includes comprehensive tests using Temporal's testing framework:
+
+```bash
+# Install test dependencies
+poetry install --with dev
+
+# Run all tests
+poetry run pytest
+
+# Run with time-skipping for faster execution  
+poetry run pytest --workflow-environment=time-skipping
+
+# Run specific test categories
+poetry run pytest tests/test_tool_activities.py -v     # Activity tests
+poetry run pytest tests/test_agent_goal_workflow.py -v # Workflow tests
+
+# Run with coverage
+poetry run pytest --cov=workflows --cov=activities
+```
+
+**Test Coverage:**
+- ✅ **Workflow Tests**: AgentGoalWorkflow signals, queries, state management
+- ✅ **Activity Tests**: ToolActivities, LLM integration (mocked), environment configuration  
+- ✅ **Integration Tests**: End-to-end workflow and activity execution
+
+**Documentation:**
+- **Quick Start**: [TESTING.md](TESTING.md) - Simple commands to run tests
+- **Comprehensive Guide**: [tests/README.md](tests/README.md) - Detailed testing patterns and best practices
+
+## Linting and Code Quality
+
+```bash
+# Using Poetry tasks
+poetry run poe format    # Format code with black and isort
+poetry run poe lint      # Check code style and types
+poetry run poe test      # Run test suite
+
+# Manual commands  
+poetry run black .
+poetry run isort .
+poetry run mypy --check-untyped-defs --namespace-packages .
+```
+
+## Agent Customization
+
+### Adding New Tools
+1. Create tool implementation in `tools/` directory
+2. Add tool function mapping in `tools/__init__.py`  
+3. Register tool definition in `tools/tool_registry.py`
+4. Associate with goals in `tools/goal_registry.py`
+
+### Configuring Goals
+The agent supports multiple goal categories:
+- **Financial**: Money transfers, loan applications (`fin/`)
+- **HR**: PTO booking, payroll status (`hr/`)  
+- **Travel**: Flight/train booking, event finding
+- **Ecommerce**: Order tracking, package management (`ecommerce/`)
+
+See [adding-goals-and-tools.md](adding-goals-and-tools.md) for detailed customization guide.
+
+## Architecture
+
+This system implements "Agentic AI" with these key components:
+1. **Goals** - High-level objectives accomplished through tool sequences
+2. **Agent Loops** - LLM execution → tool calls → human input → repeat until goal completion
+3. **Tool Approval** - Human confirmation for sensitive operations
+4. **Conversation Management** - LLM-powered input validation and history summarization
+5. **Durability** - Temporal workflows ensure reliable execution across failures
+
+For detailed architecture information, see [architecture.md](architecture.md).
+
+## Commit Messages and Pull Requests
+- Use clear commit messages describing the change purpose
+- Reference specific files and line numbers when relevant (e.g., `workflows/agent_goal_workflow.py:125`)
+- Open PRs describing **what changed** and **why**
+- Ensure tests pass before submitting: `poetry run pytest --workflow-environment=time-skipping`
+
+## Additional Resources
+- **Setup Guide**: [setup.md](setup.md) - Detailed configuration instructions
+- **Architecture Decisions**: [architecture-decisions.md](architecture-decisions.md) - Why Temporal for AI agents
+- **Demo Video**: [5-minute YouTube overview](https://www.youtube.com/watch?v=GEXllEH2XiQ)
+- **Multi-Agent Demo**: [Advanced multi-agent execution](https://www.youtube.com/watch?v=8Dc_0dC14yY)
\ No newline at end of file
diff --git a/README.md b/README.md
index 3b15b83..d6f7d17 100644
--- a/README.md
+++ b/README.md
@@ -46,11 +46,44 @@ See [the guide to adding goals and tools](./adding-goals-and-tools.md).
 ## Architecture
 See [the architecture guide](./architecture.md).
 
+## Testing
+
+The project includes comprehensive tests for workflows and activities using Temporal's testing framework:
+
+```bash
+# Install dependencies including test dependencies
+poetry install --with dev
+
+# Run all tests
+poetry run pytest
+
+# Run with time-skipping for faster execution
+poetry run pytest --workflow-environment=time-skipping
+```
+
+**Test Coverage:**
+- ✅ **Workflow Tests**: AgentGoalWorkflow signals, queries, state management
+- ✅ **Activity Tests**: ToolActivities, LLM integration (mocked), environment configuration
+- ✅ **Integration Tests**: End-to-end workflow and activity execution
+
+**Documentation:**
+- **Quick Start**: [TESTING.md](TESTING.md) - Simple commands to run tests
+- **Comprehensive Guide**: [tests/README.md](tests/README.md) - Detailed testing documentation, patterns, and best practices
+
+## Development
+
+Install dependencies:
+```bash
+poetry install
+```
+
+Start the Temporal Server and API server, see [setup](setup.md)
+
 ## Productionalization & Adding Features
 - In a prod setting, I would need to ensure that payload data is stored separately (e.g. in S3 or a noSQL db - the claim-check pattern), or otherwise 'garbage collected'. Without these techniques, long conversations will fill up the workflow's conversation history, and start to breach Temporal event history payload limits.
 - A single worker can easily support many agent workflows (chats) running at the same time. Currently the workflow ID is the same each time, so it will only run one agent at a time. To run multiple agents, you can use a different workflow ID each time (e.g. by using a UUID or timestamp).
 - Perhaps the UI should show when the LLM response is being retried (i.e. activity retry attempt because the LLM provided bad output)
-- Tests would be nice! [See tests](./tests/).
+- The project now includes comprehensive tests for workflows and activities! [See testing guide](TESTING.md).
 
 
 See [the todo](./todo.md) for more details.
diff --git a/TESTING.md b/TESTING.md
new file mode 100644
index 0000000..734435b
--- /dev/null
+++ b/TESTING.md
@@ -0,0 +1,163 @@
+# Testing the Temporal AI Agent
+
+This guide provides instructions for running the comprehensive test suite for the Temporal AI Agent project.
+
+## Quick Start
+
+1. **Install dependencies**:
+   ```bash
+   poetry install --with dev
+   ```
+
+2. **Run all tests**:
+   ```bash
+   poetry run pytest
+   ```
+
+3. **Run with time-skipping for faster execution**:
+   ```bash
+   poetry run pytest --workflow-environment=time-skipping
+   ```
+
+## Test Categories
+
+### Unit Tests
+- **Activity Tests**: `tests/test_tool_activities.py`
+  - LLM integration (mocked)
+  - Environment configuration
+  - JSON processing
+  - Dynamic tool execution
+
+### Integration Tests  
+- **Workflow Tests**: `tests/test_agent_goal_workflow.py`
+  - Full workflow execution
+  - Signal and query handling
+  - State management
+  - Error scenarios
+
+## Running Specific Tests
+
+```bash
+# Run only activity tests
+poetry run pytest tests/test_tool_activities.py -v
+
+# Run only workflow tests  
+poetry run pytest tests/test_agent_goal_workflow.py -v
+
+# Run a specific test
+poetry run pytest tests/test_tool_activities.py::TestToolActivities::test_sanitize_json_response -v
+
+# Run tests matching a pattern
+poetry run pytest -k "validation" -v
+```
+
+## Test Environment Options
+
+### Local Environment (Default)
+```bash
+poetry run pytest --workflow-environment=local
+```
+
+### Time-Skipping Environment (Recommended for CI)
+```bash
+poetry run pytest --workflow-environment=time-skipping
+```
+
+### External Temporal Server
+```bash
+poetry run pytest --workflow-environment=localhost:7233
+```
+
+## Environment Variables
+
+Tests can be configured with these environment variables:
+
+- `LLM_MODEL`: Model for LLM testing (default: "openai/gpt-4")
+- `LLM_KEY`: API key for LLM service (mocked in tests)
+- `LLM_BASE_URL`: Custom LLM endpoint (optional)
+
+## Test Coverage
+
+The test suite covers:
+
+✅ **Workflows**
+- AgentGoalWorkflow initialization and execution
+- Signal handling (user_prompt, confirm, end_chat)
+- Query methods (conversation history, agent goal, tool data)
+- State management and conversation flow
+- Validation and error handling
+
+✅ **Activities**  
+- ToolActivities class methods
+- LLM integration (mocked)
+- Environment variable handling
+- JSON response processing
+- Dynamic tool activity execution
+
+✅ **Integration**
+- End-to-end workflow execution
+- Activity registration in workers
+- Temporal client interactions
+
+## Test Output
+
+Successful test run example:
+```
+============================== test session starts ==============================
+platform darwin -- Python 3.11.3, pytest-8.3.5, pluggy-1.5.0
+rootdir: /Users/steveandroulakis/Documents/Code/agentic/temporal-demo/temporal-ai-agent
+configfile: pyproject.toml
+plugins: anyio-4.5.2, asyncio-0.26.0
+collected 21 items
+
+tests/test_tool_activities.py::TestToolActivities::test_sanitize_json_response PASSED
+tests/test_tool_activities.py::TestToolActivities::test_parse_json_response_success PASSED
+tests/test_tool_activities.py::TestToolActivities::test_get_wf_env_vars_default_values PASSED
+...
+
+============================== 21 passed in 12.5s ==============================
+```
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Module not found errors**: Run `poetry install --with dev`
+2. **Async warnings**: These are expected with pytest-asyncio and can be ignored  
+3. **Test timeouts**: Use `--workflow-environment=time-skipping` for faster execution
+4. **Import errors**: Check that you're running tests from the project root directory
+
+### Debugging Tests
+
+Enable verbose logging:
+```bash
+poetry run pytest --log-cli-level=DEBUG -s
+```
+
+Run with coverage:
+```bash
+poetry run pytest --cov=workflows --cov=activities
+```
+
+## Continuous Integration
+
+For CI environments, use:
+```bash
+poetry run pytest --workflow-environment=time-skipping --tb=short
+```
+
+## Additional Resources
+
+- See `tests/README.md` for detailed testing documentation
+- Review `tests/conftest.py` for available test fixtures
+- Check individual test files for specific test scenarios
+
+## Test Architecture
+
+The tests use:
+- **Temporal Testing Framework**: For workflow and activity testing
+- **pytest-asyncio**: For async test support  
+- **unittest.mock**: For mocking external dependencies
+- **Test Fixtures**: For consistent test data and setup
+
+All external dependencies (LLM calls, file I/O) are mocked to ensure fast, reliable tests.
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index fc33578..a181759 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -56,4 +56,5 @@ asyncio_mode = "auto"
 log_cli = true
 log_cli_level = "INFO"
 log_cli_format = "%(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)"
-asyncio_default_fixture_loop_scope = "function"
\ No newline at end of file
+asyncio_default_fixture_loop_scope = "function"
+norecursedirs = ["vibe"]
\ No newline at end of file
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..1a420d4
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,350 @@
+# Temporal AI Agent - Testing Guide
+
+This directory contains comprehensive tests for the Temporal AI Agent project. The tests cover workflows, activities, and integration scenarios using Temporal's testing framework.
+
+## Test Structure
+
+```
+tests/
+├── README.md                      # This file - testing documentation
+├── conftest.py                    # Test configuration and fixtures
+├── test_agent_goal_workflow.py    # Workflow tests
+├── test_tool_activities.py        # Activity tests
+└── workflowtests/                 # Legacy workflow tests
+    └── agent_goal_workflow_test.py
+```
+
+## Test Types
+
+### 1. Workflow Tests (`test_agent_goal_workflow.py`)
+
+Tests the main `AgentGoalWorkflow` class covering:
+
+- **Workflow Initialization**: Basic workflow startup and state management
+- **Signal Handling**: Testing user_prompt, confirm, end_chat signals
+- **Query Methods**: Testing all workflow query endpoints
+- **State Management**: Conversation history, goal changes, tool data
+- **Validation Flow**: Prompt validation and error handling
+- **Tool Execution Flow**: Confirmation and tool execution cycles
+
+### 2. Activity Tests (`test_tool_activities.py`)
+
+Tests the `ToolActivities` class and `dynamic_tool_activity` function:
+
+- **LLM Integration**: Testing agent_toolPlanner with mocked LLM responses
+- **Validation Logic**: Testing agent_validatePrompt with various scenarios
+- **Environment Configuration**: Testing get_wf_env_vars with different env setups
+- **JSON Processing**: Testing response parsing and sanitization
+- **Dynamic Tool Execution**: Testing the dynamic activity dispatcher
+- **Integration**: End-to-end activity execution in Temporal workers
+
+### 3. Configuration Tests (`conftest.py`)
+
+Provides shared test fixtures and configuration:
+
+- **Temporal Environment**: Local and time-skipping test environments
+- **Sample Data**: Pre-configured agent goals, conversation history, inputs
+- **Test Client**: Configured Temporal client for testing
+
+## Running Tests
+
+### Prerequisites
+
+Ensure you have the required dependencies installed:
+
+```bash
+poetry install --with dev
+```
+
+### Basic Test Execution
+
+Run all tests:
+```bash
+poetry run pytest
+```
+
+Run specific test files:
+```bash
+# Workflow tests only
+poetry run pytest tests/test_agent_goal_workflow.py
+
+# Activity tests only
+poetry run pytest tests/test_tool_activities.py
+
+# Legacy tests
+poetry run pytest tests/workflowtests/
+```
+
+Run with verbose output:
+```bash
+poetry run pytest -v
+```
+
+### Test Environment Options
+
+The tests support different Temporal environments via the `--workflow-environment` flag:
+
+#### Local Environment (Default)
+Uses a local Temporal test server:
+```bash
+poetry run pytest --workflow-environment=local
+```
+
+#### Time-Skipping Environment
+Uses Temporal's time-skipping test environment for faster execution:
+```bash
+poetry run pytest --workflow-environment=time-skipping
+```
+
+#### External Server
+Connect to an existing Temporal server:
+```bash
+poetry run pytest --workflow-environment=localhost:7233
+```
+
+#### Setup Script for AI Agent environments such as OpenAI Codex
+```bash
+export SHELL=/bin/bash
+curl -sSL https://install.python-poetry.org | python3 -
+export PATH="$HOME/.local/bin:$PATH"
+ls
+poetry install --with dev
+cd frontend
+npm install
+cd ..
+
+# Pre-download the temporal test server binary
+poetry run python3 -c "
+import asyncio
+import sys
+from temporalio.testing import WorkflowEnvironment
+
+async def predownload():
+    try:
+        print('Starting test server download...')
+        env = await WorkflowEnvironment.start_time_skipping()
+        print('Test server downloaded and started successfully')
+        await env.shutdown()
+        print('Test server shut down successfully')
+    except Exception as e:
+        print(f'Error during download: {e}')
+        sys.exit(1)
+
+asyncio.run(predownload())
+"
+```
+
+### Filtering Tests
+
+Run tests by pattern:
+```bash
+# Run only validation tests
+poetry run pytest -k "validation"
+
+# Run only workflow tests
+poetry run pytest -k "workflow"
+
+# Run only activity tests
+poetry run pytest -k "activity"
+```
+
+Run tests by marker (if you add custom markers):
+```bash
+# Run only integration tests
+poetry run pytest -m integration
+
+# Skip slow tests
+poetry run pytest -m "not slow"
+```
+
+## Test Configuration
+
+### Test Discovery
+
+The `vibe/` directory is excluded from test collection to avoid conflicts with sample tests. This is configured in `pyproject.toml`:
+
+```toml
+[tool.pytest.ini_options]
+norecursedirs = ["vibe"]
+```
+
+### Environment Variables
+
+Tests respect the following environment variables:
+
+- `LLM_MODEL`: Model to use for LLM testing (defaults to "openai/gpt-4")
+- `LLM_KEY`: API key for LLM service
+- `LLM_BASE_URL`: Custom base URL for LLM service
+- `SHOW_CONFIRM`: Whether to show confirmation dialogs
+- `AGENT_GOAL`: Default agent goal setting
+
+### Mocking Strategy
+
+The tests use extensive mocking to avoid external dependencies:
+
+- **LLM Calls**: Mocked using `unittest.mock` to avoid actual API calls
+- **Tool Handlers**: Mocked to test workflow logic without tool execution
+- **Environment Variables**: Patched for consistent test environments
+
+## Writing New Tests
+
+### Test Naming Convention
+
+- Test files: `test_<module_name>.py`
+- Test classes: `Test<ClassName>`
+- Test methods: `test_<functionality>_<scenario>`
+
+Example:
+```python
+class TestAgentGoalWorkflow:
+    async def test_user_prompt_signal_valid_input(self, client, sample_combined_input):
+        # Test implementation
+        pass
+```
+
+### Using Fixtures
+
+Leverage the provided fixtures for consistent test data:
+
+```python
+async def test_my_workflow(self, client, sample_agent_goal, sample_conversation_history):
+    # client: Temporal test client
+    # sample_agent_goal: Pre-configured AgentGoal
+    # sample_conversation_history: Sample conversation data
+    pass
+```
+
+### Mocking External Dependencies
+
+Always mock external services:
+
+```python
+@patch('activities.tool_activities.completion')
+async def test_llm_integration(self, mock_completion):
+    mock_completion.return_value.choices[0].message.content = '{"test": "response"}'
+    # Test implementation
+```
+
+### Testing Workflow Signals and Queries
+
+```python
+async def test_workflow_signal(self, client, sample_combined_input):
+    # Start workflow
+    handle = await client.start_workflow(
+        AgentGoalWorkflow.run,
+        sample_combined_input,
+        id=str(uuid.uuid4()),
+        task_queue=task_queue_name,
+    )
+    
+    # Send signal
+    await handle.signal(AgentGoalWorkflow.user_prompt, "test message")
+    
+    # Query state
+    conversation = await handle.query(AgentGoalWorkflow.get_conversation_history)
+    
+    # End workflow
+    await handle.signal(AgentGoalWorkflow.end_chat)
+    result = await handle.result()
+```
+
+## Test Data and Fixtures
+
+### Sample Agent Goal
+
+The `sample_agent_goal` fixture provides a basic agent goal with:
+- Goal ID: "test_goal"
+- One test tool with a required string argument
+- Suitable for most workflow testing scenarios
+
+### Sample Conversation History
+
+The `sample_conversation_history` fixture provides:
+- Basic user and agent message exchange
+- Proper message format for testing
+
+### Sample Combined Input
+
+The `sample_combined_input` fixture provides:
+- Complete workflow input with agent goal and tool params
+- Conversation summary and prompt queue
+- Ready for workflow execution
+
+## Debugging Tests
+
+### Verbose Logging
+
+Enable detailed logging:
+```bash
+poetry run pytest --log-cli-level=DEBUG -s
+```
+
+### Temporal Web UI
+
+When using local environment, access Temporal Web UI at http://localhost:8233 to inspect workflow executions during tests.
+
+### Test Isolation
+
+Each test uses unique task queue names to prevent interference:
+```python
+task_queue_name = str(uuid.uuid4())
+```
+
+## Continuous Integration
+
+### GitHub Actions Example
+
+```yaml
+name: Test
+on: [push, pull_request]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: '3.10'
+      - run: pip install poetry
+      - run: poetry install --with dev
+      - run: poetry run pytest --workflow-environment=time-skipping
+```
+
+### Test Coverage
+
+Generate coverage reports:
+```bash
+poetry add --group dev pytest-cov
+poetry run pytest --cov=workflows --cov=activities --cov-report=html
+```
+
+## Best Practices
+
+1. **Mock External Dependencies**: Always mock LLM calls, file I/O, and network requests
+2. **Use Time-Skipping**: For CI/CD, prefer time-skipping environment for speed
+3. **Unique Identifiers**: Use UUIDs for workflow IDs and task queues
+4. **Clean Shutdown**: Always end workflows properly in tests
+5. **Descriptive Names**: Use clear, descriptive test names
+6. **Test Edge Cases**: Include error scenarios and validation failures
+7. **Keep Tests Fast**: Use mocks to avoid slow external calls
+8. **Isolate Tests**: Ensure tests don't depend on each other
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Workflow Timeout**: Increase timeouts or use time-skipping environment
+2. **Mock Not Working**: Check patch decorators and import paths
+3. **Test Hanging**: Ensure workflows are properly ended with signals
+4. **Environment Issues**: Check environment variable settings
+
+### Getting Help
+
+- Check Temporal Python SDK documentation
+- Review existing test patterns in the codebase
+- Use `poetry run pytest --collect-only` to verify test discovery
+- Run with `-v` flag for detailed output
+
+## Legacy Tests
+
+The `workflowtests/` directory contains legacy tests. New tests should be added to the main `tests/` directory following the patterns established in this guide.
\ No newline at end of file
diff --git a/tests/conftest.py b/tests/conftest.py
index 95294fb..b7220ea 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -41,7 +41,12 @@ def event_loop():
 async def env(request) -> AsyncGenerator[WorkflowEnvironment, None]:
     env_type = request.config.getoption("--workflow-environment")
     if env_type == "local":
-        env = await WorkflowEnvironment.start_local()
+        env = await WorkflowEnvironment.start_local(
+            dev_server_extra_args=[
+                "--dynamic-config-value",
+                "frontend.enableExecuteMultiOperation=true",
+            ]
+        )
     elif env_type == "time-skipping":
         env = await WorkflowEnvironment.start_time_skipping()
     else:
@@ -53,3 +58,59 @@ async def env(request) -> AsyncGenerator[WorkflowEnvironment, None]:
 @pytest_asyncio.fixture
 async def client(env: WorkflowEnvironment) -> Client:
     return env.client
+
+
+@pytest.fixture
+def sample_agent_goal():
+    """Sample agent goal for testing."""
+    from models.tool_definitions import AgentGoal, ToolDefinition, ToolArgument
+    
+    return AgentGoal(
+        id="test_goal",
+        category_tag="test",
+        agent_name="TestAgent",
+        agent_friendly_description="A test agent for testing purposes",
+        description="Test goal for agent testing",
+        tools=[
+            ToolDefinition(
+                name="TestTool",
+                description="A test tool for testing purposes",
+                arguments=[
+                    ToolArgument(
+                        name="test_arg",
+                        type="string",
+                        description="A test argument"
+                    )
+                ]
+            )
+        ]
+    )
+
+
+@pytest.fixture
+def sample_conversation_history():
+    """Sample conversation history for testing."""
+    return {
+        "messages": [
+            {"actor": "user", "response": "Hello, I need help with testing"},
+            {"actor": "agent", "response": "I can help you with that"}
+        ]
+    }
+
+
+@pytest.fixture
+def sample_combined_input(sample_agent_goal):
+    """Sample combined input for workflow testing."""
+    from models.data_types import CombinedInput, AgentGoalWorkflowParams
+    
+    from collections import deque
+    
+    tool_params = AgentGoalWorkflowParams(
+        conversation_summary="Test conversation summary",
+        prompt_queue=deque()  # Start with empty queue for most tests
+    )
+    
+    return CombinedInput(
+        agent_goal=sample_agent_goal,
+        tool_params=tool_params
+    )
diff --git a/tests/test_agent_goal_workflow.py b/tests/test_agent_goal_workflow.py
new file mode 100644
index 0000000..b19f0f9
--- /dev/null
+++ b/tests/test_agent_goal_workflow.py
@@ -0,0 +1,540 @@
+import uuid
+from unittest.mock import patch, MagicMock
+import pytest
+from temporalio import activity
+from temporalio.client import Client
+from temporalio.worker import Worker
+from temporalio.testing import WorkflowEnvironment
+
+from workflows.agent_goal_workflow import AgentGoalWorkflow
+from activities.tool_activities import ToolActivities
+from models.data_types import (
+    CombinedInput,
+    AgentGoalWorkflowParams,
+    ConversationHistory,
+    ValidationResult,
+    ValidationInput,
+    EnvLookupOutput,
+    EnvLookupInput,
+    ToolPromptInput
+)
+
+
+class TestAgentGoalWorkflow:
+    """Test cases for AgentGoalWorkflow."""
+
+    async def test_workflow_initialization(self, client: Client, sample_combined_input: CombinedInput):
+        """Test workflow can be initialized and started."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[mock_get_wf_env_vars],
+        ):
+            # Start workflow but don't wait for completion since it runs indefinitely
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                sample_combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Verify workflow is running
+            assert handle is not None
+            
+            # Query the workflow to check initial state
+            conversation_history = await handle.query(AgentGoalWorkflow.get_conversation_history)
+            assert isinstance(conversation_history, dict)
+            assert "messages" in conversation_history
+            
+            # Test goal query
+            agent_goal = await handle.query(AgentGoalWorkflow.get_agent_goal)
+            assert agent_goal == sample_combined_input.agent_goal
+            
+            # End the workflow
+            await handle.signal(AgentGoalWorkflow.end_chat)
+            result = await handle.result()
+            assert isinstance(result, str)
+
+    async def test_user_prompt_signal(self, client: Client, sample_combined_input: CombinedInput):
+        """Test user_prompt signal handling."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+            
+        @activity.defn(name="agent_validatePrompt")
+        async def mock_agent_validatePrompt(validation_input: ValidationInput) -> ValidationResult:
+            return ValidationResult(
+                validationResult=True,
+                validationFailedReason={}
+            )
+            
+        @activity.defn(name="agent_toolPlanner")
+        async def mock_agent_toolPlanner(input: ToolPromptInput) -> dict:
+            return {
+                "next": "done",
+                "response": "Test response from LLM"
+            }
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[
+                mock_get_wf_env_vars,
+                mock_agent_validatePrompt,
+                mock_agent_toolPlanner
+            ],
+        ):
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                sample_combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Send user prompt
+            await handle.signal(AgentGoalWorkflow.user_prompt, "Hello, this is a test message")
+            
+            # Wait for workflow to complete (it should end due to "done" next step)
+            result = await handle.result()
+            assert isinstance(result, str)
+            
+            # Verify the conversation includes our message
+            import json
+            try:
+                conversation_history = json.loads(result.replace("'", '"'))
+            except:
+                # Fallback to eval if json fails
+                conversation_history = eval(result)
+            messages = conversation_history["messages"]
+            
+            # Should have our user message and agent response
+            user_messages = [msg for msg in messages if msg["actor"] == "user"]
+            assert len(user_messages) > 0
+            assert any("Hello, this is a test message" in str(msg["response"]) for msg in user_messages)
+
+    async def test_confirm_signal(self, client: Client, sample_combined_input: CombinedInput):
+        """Test confirm signal handling for tool execution."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+            
+        @activity.defn(name="agent_validatePrompt")
+        async def mock_agent_validatePrompt(validation_input: ValidationInput) -> ValidationResult:
+            return ValidationResult(
+                validationResult=True,
+                validationFailedReason={}
+            )
+            
+        @activity.defn(name="agent_toolPlanner")
+        async def mock_agent_toolPlanner(input: ToolPromptInput) -> dict:
+            return {
+                "next": "confirm",
+                "tool": "TestTool",
+                "args": {"test_arg": "test_value"},
+                "response": "Ready to execute tool"
+            }
+            
+        @activity.defn(name="TestTool")
+        async def mock_test_tool(args: dict) -> dict:
+            return {"result": "Test tool executed successfully"}
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[
+                mock_get_wf_env_vars,
+                mock_agent_validatePrompt,
+                mock_agent_toolPlanner,
+                mock_test_tool
+            ],
+        ):
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                sample_combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Send user prompt that will require confirmation
+            await handle.signal(AgentGoalWorkflow.user_prompt, "Execute the test tool")
+            
+            # Query to check tool data is set
+            import asyncio
+            await asyncio.sleep(0.1)  # Give workflow time to process
+            
+            tool_data = await handle.query(AgentGoalWorkflow.get_latest_tool_data)
+            if tool_data:
+                assert tool_data.get("tool") == "TestTool"
+                assert tool_data.get("next") == "confirm"
+            
+            # Send confirmation and end chat
+            await handle.signal(AgentGoalWorkflow.confirm)
+            await handle.signal(AgentGoalWorkflow.end_chat)
+            
+            result = await handle.result()
+            assert isinstance(result, str)
+
+    async def test_validation_failure(self, client: Client, sample_combined_input: CombinedInput):
+        """Test workflow handles validation failures correctly."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+            
+        @activity.defn(name="agent_validatePrompt")
+        async def mock_agent_validatePrompt(validation_input: ValidationInput) -> ValidationResult:
+            return ValidationResult(
+                validationResult=False,
+                validationFailedReason={
+                    "next": "question",
+                    "response": "Your request doesn't make sense in this context"
+                }
+            )
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[
+                mock_get_wf_env_vars,
+                mock_agent_validatePrompt
+            ],
+        ):
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                sample_combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Send invalid prompt
+            await handle.signal(AgentGoalWorkflow.user_prompt, "Invalid nonsensical prompt")
+            
+            # Give workflow time to process the prompt
+            import asyncio
+            await asyncio.sleep(0.2)
+            
+            # End workflow to check conversation
+            await handle.signal(AgentGoalWorkflow.end_chat)
+            result = await handle.result()
+            
+            # Verify validation failure message was added
+            import json
+            try:
+                conversation_history = json.loads(result.replace("'", '"'))
+            except:
+                # Fallback to eval if json fails
+                conversation_history = eval(result)
+            messages = conversation_history["messages"]
+            
+            # Should have validation failure response
+            agent_messages = [msg for msg in messages if msg["actor"] == "agent"]
+            assert len(agent_messages) > 0
+            assert any("doesn't make sense" in str(msg["response"]) for msg in agent_messages)
+
+    async def test_conversation_summary_initialization(self, client: Client, sample_agent_goal):
+        """Test workflow initializes with conversation summary."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create input with conversation summary  
+        from collections import deque
+        tool_params = AgentGoalWorkflowParams(
+            conversation_summary="Previous conversation summary",
+            prompt_queue=deque()
+        )
+        combined_input = CombinedInput(
+            agent_goal=sample_agent_goal,
+            tool_params=tool_params
+        )
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[mock_get_wf_env_vars],
+        ):
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Give workflow time to initialize
+            import asyncio
+            await asyncio.sleep(0.1)
+            
+            # Query conversation summary
+            summary = await handle.query(AgentGoalWorkflow.get_summary_from_history)
+            assert summary == "Previous conversation summary"
+            
+            # Query conversation history - should include summary message
+            conversation_history = await handle.query(AgentGoalWorkflow.get_conversation_history)
+            messages = conversation_history["messages"]
+            
+            # Should have conversation_summary message
+            summary_messages = [msg for msg in messages if msg["actor"] == "conversation_summary"]
+            assert len(summary_messages) == 1
+            assert summary_messages[0]["response"] == "Previous conversation summary"
+            
+            # End workflow
+            await handle.signal(AgentGoalWorkflow.end_chat)
+            await handle.result()
+
+    async def test_workflow_queries(self, client: Client, sample_combined_input: CombinedInput):
+        """Test all workflow query methods."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[mock_get_wf_env_vars],
+        ):
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                sample_combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Give workflow time to initialize
+            import asyncio
+            await asyncio.sleep(0.1)
+            
+            # Test get_conversation_history query
+            conversation_history = await handle.query(AgentGoalWorkflow.get_conversation_history)
+            assert isinstance(conversation_history, dict)
+            assert "messages" in conversation_history
+            
+            # Test get_agent_goal query
+            agent_goal = await handle.query(AgentGoalWorkflow.get_agent_goal)
+            assert agent_goal.id == sample_combined_input.agent_goal.id
+            
+            # Test get_summary_from_history query
+            summary = await handle.query(AgentGoalWorkflow.get_summary_from_history)
+            # Summary might be None if not set, so check for that
+            if sample_combined_input.tool_params.conversation_summary:
+                assert summary == sample_combined_input.tool_params.conversation_summary
+            else:
+                assert summary is None
+            
+            # Test get_latest_tool_data query (should be None initially)
+            tool_data = await handle.query(AgentGoalWorkflow.get_latest_tool_data)
+            assert tool_data is None
+            
+            # End workflow
+            await handle.signal(AgentGoalWorkflow.end_chat)
+            await handle.result()
+
+    async def test_enable_disable_debugging_confirm_signals(self, client: Client, sample_combined_input: CombinedInput):
+        """Test debugging confirm enable/disable signals."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[mock_get_wf_env_vars],
+        ):
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                sample_combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Test enable debugging confirm signal
+            await handle.signal(AgentGoalWorkflow.enable_debugging_confirm)
+            
+            # Test disable debugging confirm signal  
+            await handle.signal(AgentGoalWorkflow.disable_debugging_confirm)
+            
+            # End workflow
+            await handle.signal(AgentGoalWorkflow.end_chat)
+            result = await handle.result()
+            assert isinstance(result, str)
+
+    async def test_workflow_with_empty_prompt_queue(self, client: Client, sample_agent_goal):
+        """Test workflow behavior with empty prompt queue."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create input with empty prompt queue
+        from collections import deque
+        tool_params = AgentGoalWorkflowParams(
+            conversation_summary=None,
+            prompt_queue=deque()
+        )
+        combined_input = CombinedInput(
+            agent_goal=sample_agent_goal,
+            tool_params=tool_params
+        )
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[mock_get_wf_env_vars],
+        ):
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Give workflow time to initialize
+            import asyncio
+            await asyncio.sleep(0.1)
+            
+            # Query initial state
+            conversation_history = await handle.query(AgentGoalWorkflow.get_conversation_history)
+            assert isinstance(conversation_history, dict)
+            assert "messages" in conversation_history
+            
+            # Should have no messages initially (empty prompt queue, no summary)
+            messages = conversation_history["messages"]
+            assert len(messages) == 0
+            
+            # End workflow
+            await handle.signal(AgentGoalWorkflow.end_chat)
+            result = await handle.result()
+            assert isinstance(result, str)
+
+    async def test_multiple_user_prompts(self, client: Client, sample_combined_input: CombinedInput):
+        """Test workflow handling multiple user prompts in sequence."""
+        task_queue_name = str(uuid.uuid4())
+        
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+            
+        @activity.defn(name="agent_validatePrompt")
+        async def mock_agent_validatePrompt(validation_input: ValidationInput) -> ValidationResult:
+            return ValidationResult(
+                validationResult=True,
+                validationFailedReason={}
+            )
+            
+        @activity.defn(name="agent_toolPlanner")
+        async def mock_agent_toolPlanner(input: ToolPromptInput) -> dict:
+            # Keep workflow running for multiple prompts
+            return {
+                "next": "question",
+                "response": f"Processed: {input.prompt}"
+            }
+        
+        async with Worker(
+            client,
+            task_queue=task_queue_name,
+            workflows=[AgentGoalWorkflow],
+            activities=[
+                mock_get_wf_env_vars,
+                mock_agent_validatePrompt,
+                mock_agent_toolPlanner
+            ],
+        ):
+            handle = await client.start_workflow(
+                AgentGoalWorkflow.run,
+                sample_combined_input,
+                id=str(uuid.uuid4()),
+                task_queue=task_queue_name,
+            )
+            
+            # Send multiple prompts
+            await handle.signal(AgentGoalWorkflow.user_prompt, "First message")
+            import asyncio
+            await asyncio.sleep(0.1)
+            
+            await handle.signal(AgentGoalWorkflow.user_prompt, "Second message") 
+            await asyncio.sleep(0.1)
+            
+            await handle.signal(AgentGoalWorkflow.user_prompt, "Third message")
+            await asyncio.sleep(0.1)
+            
+            # End workflow
+            await handle.signal(AgentGoalWorkflow.end_chat)
+            result = await handle.result()
+            assert isinstance(result, str)
+            
+            # Parse result and verify multiple messages
+            import json
+            try:
+                conversation_history = json.loads(result.replace("'", '"'))
+            except:
+                conversation_history = eval(result)
+            messages = conversation_history["messages"]
+            
+            # Should have at least one user message (timing dependent)
+            user_messages = [msg for msg in messages if msg["actor"] == "user"]
+            assert len(user_messages) >= 1
+            
+            # Verify at least the first message was processed
+            message_texts = [str(msg["response"]) for msg in user_messages]
+            assert any("First message" in text for text in message_texts)
\ No newline at end of file
diff --git a/tests/test_tool_activities.py b/tests/test_tool_activities.py
new file mode 100644
index 0000000..9a486da
--- /dev/null
+++ b/tests/test_tool_activities.py
@@ -0,0 +1,466 @@
+import os
+import uuid
+import json
+from unittest.mock import patch, MagicMock, AsyncMock
+import pytest
+from temporalio.client import Client
+from temporalio.worker import Worker
+from temporalio.testing import ActivityEnvironment
+
+from activities.tool_activities import ToolActivities, dynamic_tool_activity
+from models.data_types import (
+    ValidationInput,
+    ValidationResult,
+    ToolPromptInput,
+    EnvLookupInput,
+    EnvLookupOutput
+)
+
+
+class TestToolActivities:
+    """Test cases for ToolActivities."""
+
+    def setup_method(self):
+        """Set up test environment for each test."""
+        self.tool_activities = ToolActivities()
+
+    @pytest.mark.asyncio
+    async def test_agent_validatePrompt_valid_prompt(self, sample_agent_goal, sample_conversation_history):
+        """Test agent_validatePrompt with a valid prompt."""
+        validation_input = ValidationInput(
+            prompt="I need help with the test tool",
+            conversation_history=sample_conversation_history,
+            agent_goal=sample_agent_goal
+        )
+        
+        # Mock the agent_toolPlanner to return a valid response
+        mock_response = {
+            "validationResult": True,
+            "validationFailedReason": {}
+        }
+        
+        with patch.object(self.tool_activities, 'agent_toolPlanner', new_callable=AsyncMock) as mock_planner:
+            mock_planner.return_value = mock_response
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.agent_validatePrompt,
+                validation_input
+            )
+            
+            assert isinstance(result, ValidationResult)
+            assert result.validationResult is True
+            assert result.validationFailedReason == {}
+            
+            # Verify the mock was called with correct parameters
+            mock_planner.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_agent_validatePrompt_invalid_prompt(self, sample_agent_goal, sample_conversation_history):
+        """Test agent_validatePrompt with an invalid prompt."""
+        validation_input = ValidationInput(
+            prompt="asdfghjkl nonsense",
+            conversation_history=sample_conversation_history,
+            agent_goal=sample_agent_goal
+        )
+        
+        # Mock the agent_toolPlanner to return an invalid response
+        mock_response = {
+            "validationResult": False,
+            "validationFailedReason": {
+                "next": "question",
+                "response": "Your request doesn't make sense in this context"
+            }
+        }
+        
+        with patch.object(self.tool_activities, 'agent_toolPlanner', new_callable=AsyncMock) as mock_planner:
+            mock_planner.return_value = mock_response
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.agent_validatePrompt,
+                validation_input
+            )
+            
+            assert isinstance(result, ValidationResult)
+            assert result.validationResult is False
+            assert "doesn't make sense" in str(result.validationFailedReason)
+
+    @pytest.mark.asyncio
+    async def test_agent_toolPlanner_success(self):
+        """Test agent_toolPlanner with successful LLM response."""
+        prompt_input = ToolPromptInput(
+            prompt="Test prompt",
+            context_instructions="Test context instructions"
+        )
+        
+        # Mock the completion function
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = '{"next": "confirm", "tool": "TestTool", "response": "Test response"}'
+        
+        with patch('activities.tool_activities.completion') as mock_completion:
+            mock_completion.return_value = mock_response
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.agent_toolPlanner,
+                prompt_input
+            )
+            
+            assert isinstance(result, dict)
+            assert result["next"] == "confirm"
+            assert result["tool"] == "TestTool"
+            assert result["response"] == "Test response"
+            
+            # Verify completion was called with correct parameters
+            mock_completion.assert_called_once()
+            call_args = mock_completion.call_args[1]
+            assert call_args["model"] == self.tool_activities.llm_model
+            assert len(call_args["messages"]) == 2
+            assert call_args["messages"][0]["role"] == "system"
+            assert call_args["messages"][1]["role"] == "user"
+
+    @pytest.mark.asyncio
+    async def test_agent_toolPlanner_with_custom_base_url(self):
+        """Test agent_toolPlanner with custom base URL configuration."""
+        # Set up tool activities with custom base URL
+        with patch.dict(os.environ, {'LLM_BASE_URL': 'https://custom.endpoint.com'}):
+            tool_activities = ToolActivities()
+            
+            prompt_input = ToolPromptInput(
+                prompt="Test prompt",
+                context_instructions="Test context instructions"
+            )
+            
+            mock_response = MagicMock()
+            mock_response.choices = [MagicMock()]
+            mock_response.choices[0].message.content = '{"next": "done", "response": "Test"}'
+            
+            with patch('activities.tool_activities.completion') as mock_completion:
+                mock_completion.return_value = mock_response
+                
+                activity_env = ActivityEnvironment()
+                await activity_env.run(
+                    tool_activities.agent_toolPlanner,
+                    prompt_input
+                )
+                
+                # Verify base_url was included in the call
+                call_args = mock_completion.call_args[1]
+                assert "base_url" in call_args
+                assert call_args["base_url"] == "https://custom.endpoint.com"
+
+    @pytest.mark.asyncio
+    async def test_agent_toolPlanner_json_parsing_error(self):
+        """Test agent_toolPlanner handles JSON parsing errors."""
+        prompt_input = ToolPromptInput(
+            prompt="Test prompt",
+            context_instructions="Test context instructions"
+        )
+        
+        # Mock the completion function to return invalid JSON
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = 'Invalid JSON response'
+        
+        with patch('activities.tool_activities.completion') as mock_completion:
+            mock_completion.return_value = mock_response
+            
+            activity_env = ActivityEnvironment()
+            with pytest.raises(Exception):  # Should raise JSON parsing error
+                await activity_env.run(
+                    self.tool_activities.agent_toolPlanner,
+                    prompt_input
+                )
+
+    @pytest.mark.asyncio
+    async def test_get_wf_env_vars_default_values(self):
+        """Test get_wf_env_vars with default values."""
+        env_input = EnvLookupInput(
+            show_confirm_env_var_name="SHOW_CONFIRM",
+            show_confirm_default=True
+        )
+        
+        # Clear environment variables
+        with patch.dict(os.environ, {}, clear=True):
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.get_wf_env_vars,
+                env_input
+            )
+            
+            assert isinstance(result, EnvLookupOutput)
+            assert result.show_confirm is True  # default value
+            assert result.multi_goal_mode is True  # default value
+
+    @pytest.mark.asyncio
+    async def test_get_wf_env_vars_custom_values(self):
+        """Test get_wf_env_vars with custom environment values."""
+        env_input = EnvLookupInput(
+            show_confirm_env_var_name="SHOW_CONFIRM",
+            show_confirm_default=True
+        )
+        
+        # Set environment variables
+        with patch.dict(os.environ, {
+            'SHOW_CONFIRM': 'false',
+            'AGENT_GOAL': 'specific_goal'
+        }):
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.get_wf_env_vars,
+                env_input
+            )
+            
+            assert isinstance(result, EnvLookupOutput)
+            assert result.show_confirm is False  # from env var
+            assert result.multi_goal_mode is False  # from env var
+
+    def test_sanitize_json_response(self):
+        """Test JSON response sanitization."""
+        # Test with markdown code blocks
+        response_with_markdown = "```json\n{\"test\": \"value\"}\n```"
+        sanitized = self.tool_activities.sanitize_json_response(response_with_markdown)
+        assert sanitized == '{"test": "value"}'
+        
+        # Test with extra whitespace
+        response_with_whitespace = "  \n{\"test\": \"value\"}  \n"
+        sanitized = self.tool_activities.sanitize_json_response(response_with_whitespace)
+        assert sanitized == '{"test": "value"}'
+
+    def test_parse_json_response_success(self):
+        """Test successful JSON parsing."""
+        json_string = '{"next": "confirm", "tool": "TestTool"}'
+        result = self.tool_activities.parse_json_response(json_string)
+        
+        assert isinstance(result, dict)
+        assert result["next"] == "confirm"
+        assert result["tool"] == "TestTool"
+
+    def test_parse_json_response_failure(self):
+        """Test JSON parsing with invalid JSON."""
+        invalid_json = "Not valid JSON"
+        
+        with pytest.raises(Exception):  # Should raise JSON parsing error
+            self.tool_activities.parse_json_response(invalid_json)
+
+
+class TestDynamicToolActivity:
+    """Test cases for dynamic_tool_activity."""
+
+    @pytest.mark.asyncio
+    async def test_dynamic_tool_activity_sync_handler(self):
+        """Test dynamic tool activity with synchronous handler."""
+        # Mock the activity info and payload converter
+        mock_info = MagicMock()
+        mock_info.activity_type = "TestTool"
+        
+        mock_payload_converter = MagicMock()
+        mock_payload = MagicMock()
+        mock_payload.payload = b'{"test_arg": "test_value"}'
+        mock_payload_converter.from_payload.return_value = {"test_arg": "test_value"}
+        
+        # Mock the handler function
+        def mock_handler(args):
+            return {"result": f"Handled {args['test_arg']}"}
+        
+        with patch('temporalio.activity.info', return_value=mock_info), \
+             patch('temporalio.activity.payload_converter', return_value=mock_payload_converter), \
+             patch('tools.get_handler', return_value=mock_handler):
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                dynamic_tool_activity,
+                [mock_payload]
+            )
+            
+            assert isinstance(result, dict)
+            assert result["result"] == "Handled test_value"
+
+    @pytest.mark.asyncio
+    async def test_dynamic_tool_activity_async_handler(self):
+        """Test dynamic tool activity with asynchronous handler."""
+        # Mock the activity info and payload converter
+        mock_info = MagicMock()
+        mock_info.activity_type = "AsyncTestTool"
+        
+        mock_payload_converter = MagicMock()
+        mock_payload = MagicMock()
+        mock_payload.payload = b'{"test_arg": "async_test"}'
+        mock_payload_converter.from_payload.return_value = {"test_arg": "async_test"}
+        
+        # Mock the async handler function
+        async def mock_async_handler(args):
+            return {"async_result": f"Async handled {args['test_arg']}"}
+        
+        with patch('temporalio.activity.info', return_value=mock_info), \
+             patch('temporalio.activity.payload_converter', return_value=mock_payload_converter), \
+             patch('tools.get_handler', return_value=mock_async_handler):
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                dynamic_tool_activity,
+                [mock_payload]
+            )
+            
+            assert isinstance(result, dict)
+            assert result["async_result"] == "Async handled async_test"
+
+
+class TestToolActivitiesIntegration:
+    """Integration tests for ToolActivities in a real Temporal environment."""
+
+    @pytest.mark.asyncio
+    async def test_activities_in_worker(self, client: Client):
+        """Test activities can be registered and executed in a worker."""
+        task_queue_name = str(uuid.uuid4())
+        tool_activities = ToolActivities()
+        
+        # Test get_wf_env_vars activity using ActivityEnvironment
+        env_input = EnvLookupInput(
+            show_confirm_env_var_name="TEST_CONFIRM",
+            show_confirm_default=False
+        )
+        
+        activity_env = ActivityEnvironment()
+        result = await activity_env.run(
+            tool_activities.get_wf_env_vars,
+            env_input
+        )
+        
+        assert isinstance(result, EnvLookupOutput)
+        assert isinstance(result.show_confirm, bool)
+        assert isinstance(result.multi_goal_mode, bool)
+
+
+class TestEdgeCases:
+    """Test edge cases and error handling."""
+    
+    def setup_method(self):
+        """Set up test environment for each test."""
+        self.tool_activities = ToolActivities()
+
+    @pytest.mark.asyncio
+    async def test_agent_validatePrompt_with_empty_conversation_history(self, sample_agent_goal):
+        """Test validation with empty conversation history."""
+        validation_input = ValidationInput(
+            prompt="Test prompt",
+            conversation_history={"messages": []},
+            agent_goal=sample_agent_goal
+        )
+        
+        mock_response = {
+            "validationResult": True,
+            "validationFailedReason": {}
+        }
+        
+        with patch.object(self.tool_activities, 'agent_toolPlanner', new_callable=AsyncMock) as mock_planner:
+            mock_planner.return_value = mock_response
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.agent_validatePrompt,
+                validation_input
+            )
+            
+            assert isinstance(result, ValidationResult)
+            assert result.validationResult == True
+            assert result.validationFailedReason == {}
+
+    @pytest.mark.asyncio
+    async def test_agent_toolPlanner_with_long_prompt(self):
+        """Test toolPlanner with very long prompt."""
+        long_prompt = "This is a very long prompt " * 100
+        tool_prompt_input = ToolPromptInput(
+            prompt=long_prompt,
+            context_instructions="Test context instructions"
+        )
+        
+        # Mock the completion response
+        mock_response = MagicMock()
+        mock_response.choices = [MagicMock()]
+        mock_response.choices[0].message.content = '{"next": "done", "response": "Processed long prompt"}'
+        
+        with patch('activities.tool_activities.completion', return_value=mock_response):
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.agent_toolPlanner,
+                tool_prompt_input
+            )
+            
+            assert isinstance(result, dict)
+            assert result["next"] == "done"
+            assert "Processed long prompt" in result["response"]
+
+    @pytest.mark.asyncio
+    async def test_sanitize_json_with_various_formats(self):
+        """Test JSON sanitization with various input formats."""
+        # Test markdown code blocks
+        markdown_json = "```json\n{\"test\": \"value\"}\n```"
+        result = self.tool_activities.sanitize_json_response(markdown_json)
+        assert result == '{"test": "value"}'
+        
+        # Test with extra whitespace
+        whitespace_json = "   \n  {\"test\": \"value\"}  \n  "
+        result = self.tool_activities.sanitize_json_response(whitespace_json)
+        assert result == '{"test": "value"}'
+        
+        # Test already clean JSON
+        clean_json = '{"test": "value"}'
+        result = self.tool_activities.sanitize_json_response(clean_json)
+        assert result == '{"test": "value"}'
+
+    @pytest.mark.asyncio
+    async def test_parse_json_response_with_invalid_json(self):
+        """Test JSON parsing with invalid JSON."""
+        with pytest.raises(json.JSONDecodeError):
+            self.tool_activities.parse_json_response("Invalid JSON {test: value")
+
+    @pytest.mark.asyncio
+    async def test_get_wf_env_vars_with_various_env_values(self):
+        """Test environment variable parsing with different values."""
+        # Test with "true" string
+        with patch.dict(os.environ, {"TEST_CONFIRM": "true"}):
+            env_input = EnvLookupInput(
+                show_confirm_env_var_name="TEST_CONFIRM",
+                show_confirm_default=False
+            )
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.get_wf_env_vars,
+                env_input
+            )
+            
+            assert result.show_confirm == True
+        
+        # Test with "false" string
+        with patch.dict(os.environ, {"TEST_CONFIRM": "false"}):
+            env_input = EnvLookupInput(
+                show_confirm_env_var_name="TEST_CONFIRM", 
+                show_confirm_default=True
+            )
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.get_wf_env_vars,
+                env_input
+            )
+            
+            assert result.show_confirm == False
+        
+        # Test with missing env var (should use default)
+        with patch.dict(os.environ, {}, clear=True):
+            env_input = EnvLookupInput(
+                show_confirm_env_var_name="MISSING_VAR",
+                show_confirm_default=True
+            )
+            
+            activity_env = ActivityEnvironment()
+            result = await activity_env.run(
+                self.tool_activities.get_wf_env_vars,
+                env_input
+            )
+            
+            assert result.show_confirm == True
\ No newline at end of file
diff --git a/tests/workflowtests/agent_goal_workflow_test.py b/tests/workflowtests/agent_goal_workflow_test.py
index ab67478..6d74fc8 100644
--- a/tests/workflowtests/agent_goal_workflow_test.py
+++ b/tests/workflowtests/agent_goal_workflow_test.py
@@ -1,9 +1,19 @@
+import uuid
 from temporalio.client import Client, WorkflowExecutionStatus
 from temporalio.worker import Worker
+from temporalio import activity
 import concurrent.futures
 from temporalio.testing import WorkflowEnvironment
 from api.main import get_initial_agent_goal
-from models.data_types import AgentGoalWorkflowParams, CombinedInput
+from models.data_types import (
+    AgentGoalWorkflowParams, 
+    CombinedInput,
+    ValidationResult,
+    ValidationInput,
+    EnvLookupOutput,
+    EnvLookupInput,
+    ToolPromptInput
+)
 from workflows.agent_goal_workflow import AgentGoalWorkflow
 from activities.tool_activities import ToolActivities, dynamic_tool_activity
 from unittest.mock import patch
@@ -31,15 +41,41 @@ async def test_flight_booking(client: Client):
         # Create the test environment
         #env = await WorkflowEnvironment.start_local()
         #client = env.client
-        task_queue_name = "agent-ai-workflow"
-        workflow_id = "agent-workflow"
+        task_queue_name = str(uuid.uuid4())
+        workflow_id = str(uuid.uuid4())
+
+        # Create mock activity functions with proper signatures
+        @activity.defn(name="get_wf_env_vars")
+        async def mock_get_wf_env_vars(input: EnvLookupInput) -> EnvLookupOutput:
+            return EnvLookupOutput(
+                show_confirm=True,
+                multi_goal_mode=True
+            )
+            
+        @activity.defn(name="agent_validatePrompt")
+        async def mock_agent_validatePrompt(validation_input: ValidationInput) -> ValidationResult:
+            return ValidationResult(
+                validationResult=True,
+                validationFailedReason={}
+            )
+            
+        @activity.defn(name="agent_toolPlanner")
+        async def mock_agent_toolPlanner(input: ToolPromptInput) -> dict:
+            return {
+                "next": "done",
+                "response": "Test response from LLM"
+            }
 
         with concurrent.futures.ThreadPoolExecutor(max_workers=100) as activity_executor:        
             worker = Worker(
                 client, 
                 task_queue=task_queue_name,
                 workflows=[AgentGoalWorkflow],
-                activities=[ToolActivities.agent_validatePrompt, ToolActivities.agent_toolPlanner, ToolActivities.get_wf_env_vars, dynamic_tool_activity],
+                activities=[
+                    mock_get_wf_env_vars,
+                    mock_agent_validatePrompt,
+                    mock_agent_toolPlanner
+                ],
                 activity_executor=activity_executor,
             )