system context

2026-03-15 14:08:08 +01:00 · 2024-12-31 12:19:53 -08:00
parent c6b71b8ffa
commit 4286be2e5d
4 changed files with 72 additions and 57 deletions
--- a/README.md
+++ b/README.md
@@ -2,7 +2,12 @@

 Multi-Turn Chat using an Entity Workflow. The workflow runs forever unless explicitly ended. The workflow continues as new after a configurable number of chat turns to keep the prompt size small and the Temporal event history small. Each continued-as-new workflow receives a summary of the conversation history so far for context.

-To run, first see `samples-python` [README.md](../../README.md), and `ollama` [README.md](../README.md) for prerequisites specific to this sample. Once set up, run the following from this directory:
+## Setup
+* Install [Ollama](https://ollama.com) and the Mistral model (`ollama run mistral`).
+* Install and run Temporal. Follow the instructions in the [Temporal documentation](https://learn.temporal.io/getting_started/python/dev_environment/#set-up-a-local-temporal-service-for-development-with-temporal-cli) to install and run the Temporal server.
+* Install the dependencies: `poetry install`
+
+## Running the example

 1. Run the worker: `poetry run python run_worker.py`
 2. In another terminal run the client with a prompt.
--- a/activities.py
+++ b/activities.py
@@ -1,19 +1,26 @@
+from dataclasses import dataclass
 from temporalio import activity
 from ollama import chat, ChatResponse

+@dataclass
+class OllamaPromptInput:
+    prompt: str
+    context_instructions: str
+
 class OllamaActivities:
    @activity.defn
-    def prompt_ollama(self, prompt: str) -> str:
+    def prompt_ollama(self, input: OllamaPromptInput) -> str:
        model_name = 'mistral'
        messages = [
+            {
+                'role': 'system',
+                'content': input.context_instructions,
+            },
            {
                'role': 'user',
-                'content': prompt
+                'content': input.prompt,
            }
        ]

-        # Call ollama's chat function
        response: ChatResponse = chat(model=model_name, messages=messages)
-        
-        # Return the model's text response
        return response.message.content
--- a/send_message.py
+++ b/send_message.py
@@ -14,7 +14,7 @@ async def main(prompt):
    # Sends a signal to the workflow (and starts it if needed)
    await client.start_workflow(
        EntityOllamaWorkflow.run,
-        OllamaParams(None, None),
+        OllamaParams(None, None),  # or pass in custom summary/prompt_queue if desired
        id=workflow_id,
        task_queue="ollama-task-queue",
        start_signal="user_prompt",
--- a/workflows.py
+++ b/workflows.py
@@ -6,7 +6,8 @@ from typing import Deque, List, Optional, Tuple
 from temporalio import workflow

 with workflow.unsafe.imports_passed_through():
-    from activities import OllamaActivities
+    # Import the updated OllamaActivities and the new dataclass
+    from activities import OllamaActivities, OllamaPromptInput


@dataclass
@@ -18,7 +19,6 @@ class OllamaParams:
@workflow.defn
 class EntityOllamaWorkflow:
    def __init__(self) -> None:
-        # List to store prompt history
        self.conversation_history: List[Tuple[str, str]] = []
        self.prompt_queue: Deque[str] = deque()
        self.conversation_summary: Optional[str] = None
@@ -26,16 +26,11 @@ class EntityOllamaWorkflow:
        self.chat_ended: bool = False

    @workflow.run
-    async def run(
-        self,
-        params: OllamaParams,
-    ) -> str:
-
+    async def run(self, params: OllamaParams) -> str:
        if params and params.conversation_summary:
            self.conversation_history.append(
                ("conversation_summary", params.conversation_summary)
            )
-
            self.conversation_summary = params.conversation_summary

        if params and params.prompt_queue:
@@ -44,71 +39,79 @@ class EntityOllamaWorkflow:
        while True:
            workflow.logger.info("Waiting for prompts...")

-            # Wait for a chat message (signal) or timeout
            await workflow.wait_condition(
                lambda: bool(self.prompt_queue) or self.chat_ended
            )

            if self.prompt_queue:
-                # Fetch next user prompt and add to conversation history
+                # Get user's prompt
                prompt = self.prompt_queue.popleft()
                self.conversation_history.append(("user", prompt))

+                # Build prompt + context
+                context_instructions, actual_prompt = self.prompt_with_history(prompt)
                workflow.logger.info("Prompt: " + prompt)

-                # Send prompt to Ollama
+                # Pass a single input object
+                prompt_input = OllamaPromptInput(
+                    prompt=actual_prompt,
+                    context_instructions=context_instructions,
+                )
+
+                # Call activity with one argument
                response = await workflow.execute_activity_method(
                    OllamaActivities.prompt_ollama,
-                    self.prompt_with_history(prompt),
+                    prompt_input,
                    schedule_to_close_timeout=timedelta(seconds=20),
                )

-                workflow.logger.info(f"{response}")
-
-                # Append the response to the conversation history
+                workflow.logger.info(f"Ollama response: {response}")
                self.conversation_history.append(("response", response))

-                # Continue as new every x conversational turns to avoid event
-                # history size getting too large. This is also to avoid the
-                # prompt (with conversational history) getting too large for
-                # AWS Ollama.
-
-                # We summarize the chat to date and use that as input to the
-                # new workflow
+                # Continue as new after X turns
                if len(self.conversation_history) >= self.continue_as_new_per_turns:
-                    # Summarize the conversation to date using Ollama
+                    # Summarize conversation
+                    summary_context, summary_prompt = self.prompt_summary_with_history()
+                    summary_input = OllamaPromptInput(
+                        prompt=summary_prompt,
+                        context_instructions=summary_context,
+                    )
+
                    self.conversation_summary = await workflow.start_activity_method(
                        OllamaActivities.prompt_ollama,
-                        self.prompt_summary_from_history(),
+                        summary_input,
                        schedule_to_close_timeout=timedelta(seconds=20),
                    )

                    workflow.logger.info(
-                        "Continuing as new due to %i conversational turns."
+                        "Continuing as new after %i turns."
                        % self.continue_as_new_per_turns,
                    )

                    workflow.continue_as_new(
                        args=[
                            OllamaParams(
-                                self.conversation_summary,
-                                self.prompt_queue,
+                                conversation_summary=self.conversation_summary,
+                                prompt_queue=self.prompt_queue,
                            )
                        ]
                    )

                continue

-            # If end chat signal was sent
+            # Handle end of chat
            if self.chat_ended:
-                # The workflow might be continued as new without any
-                # chat to summarize, so only call Ollama if there
-                # is more than the previous summary in the history.
                if len(self.conversation_history) > 1:
-                    # Summarize the conversation to date using Ollama
+                    # Summarize conversation
+                    summary_context, summary_prompt = self.prompt_summary_with_history()
+                    summary_input = OllamaPromptInput(
+                        prompt=summary_prompt,
+                        context_instructions=summary_context,
+                    )
+
                    self.conversation_summary = await workflow.start_activity_method(
                        OllamaActivities.prompt_ollama,
-                        self.prompt_summary_from_history(),
+                        summary_input,
                        schedule_to_close_timeout=timedelta(seconds=20),
                    )

@@ -116,16 +119,13 @@ class EntityOllamaWorkflow:
                    "Chat ended. Conversation summary:\n"
                    + f"{self.conversation_summary}"
                )
-
                return f"{self.conversation_history}"

    @workflow.signal
    async def user_prompt(self, prompt: str) -> None:
-        # Chat ended but the workflow is waiting for a chat summary to be generated
        if self.chat_ended:
            workflow.logger.warn(f"Message dropped due to chat closed: {prompt}")
            return
-
        self.prompt_queue.append(prompt)

    @workflow.signal
@@ -140,25 +140,28 @@ class EntityOllamaWorkflow:
    def get_summary_from_history(self) -> Optional[str]:
        return self.conversation_summary

-    # Helper method used in prompts to Ollama
+    # Helper: generate text of the entire conversation so far
    def format_history(self) -> str:
        return " ".join(f"{text}" for _, text in self.conversation_history)

-    # Create the prompt given to Ollama for each conversational turn
-    def prompt_with_history(self, prompt: str) -> str:
+    # Return (context_instructions, prompt)
+    def prompt_with_history(self, prompt: str) -> tuple[str, str]:
        history_string = self.format_history()
-        return (
-            f"Here is the conversation history: {history_string} Please add "
-            + "a few sentence response to the prompt in plain text sentences. "
-            + "Don't editorialize or add metadata like response. Keep the "
-            + f"text a plain explanation based on the history. Prompt: {prompt}"
+        context_instructions = (
+            f"Here is the conversation history: {history_string} "
+            "Please add a few sentence response in plain text sentences. "
+            "Don't editorialize or add metadata. "
+            "Keep the text a plain explanation based on the history."
        )
+        return (context_instructions, prompt)

-    # Create the prompt to Ollama to summarize the conversation history
-    def prompt_summary_from_history(self) -> str:
+    # Return (context_instructions, prompt) for summarizing the conversation
+    def prompt_summary_with_history(self) -> tuple[str, str]:
        history_string = self.format_history()
-        return (
-            "Here is the conversation history between a user and a chatbot: "
-            + f"{history_string}  -- Please produce a two sentence summary of "
-            + "this conversation."
+        context_instructions = (
+            f"Here is the conversation history between a user and a chatbot: {history_string}"
        )
+        actual_prompt = (
+            "Please produce a two sentence summary of this conversation."
+        )
+        return (context_instructions, actual_prompt)