pre-warm ollama local model on initialization

2026-03-16 22:48:09 +01:00 · 2025-02-28 07:31:44 -06:00
parent 7fa75395d8
commit 61147136fd
2 changed files with 132 additions and 32 deletions
--- a/scripts/run_worker.py
+++ b/scripts/run_worker.py
@@ -14,11 +14,11 @@ from shared.config import get_temporal_client, TEMPORAL_TASK_QUEUE
 async def main():
    # Load environment variables
    load_dotenv(override=True)
-    
+
    # Print LLM configuration info
    llm_provider = os.environ.get("LLM_PROVIDER", "openai").lower()
    print(f"Worker will use LLM provider: {llm_provider}")
-    
+
    # Create the client
    client = await get_temporal_client()

@@ -26,6 +26,29 @@ async def main():
    activities = ToolActivities()
    print(f"ToolActivities initialized with LLM provider: {llm_provider}")

+    # If using Ollama, pre-load the model to avoid cold start latency
+    if llm_provider == "ollama":
+        print("\n======== OLLAMA MODEL INITIALIZATION ========")
+        print("Ollama models need to be loaded into memory on first use.")
+        print("This may take 30+ seconds depending on your hardware and model size.")
+        print("Please wait while the model is being loaded...")
+
+        # This call will load the model and measure initialization time
+        success = activities.warm_up_ollama()
+
+        if success:
+            print("===========================================================")
+            print("✅ Ollama model successfully pre-loaded and ready for requests!")
+            print("===========================================================\n")
+        else:
+            print("===========================================================")
+            print("⚠️ Ollama model pre-loading failed. The worker will continue,")
+            print("but the first actual request may experience a delay while")
+            print("the model is loaded on-demand.")
+            print("===========================================================\n")
+
+    print("Worker ready to process tasks!")
+
    # Run the worker
    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as activity_executor:
        worker = Worker(