feat: LiteLLM integration

2026-03-15 22:18:09 +01:00 · 2025-05-20 13:53:20 -03:00
parent 847f4bbaef
commit dcb6271c23
8 changed files with 1337 additions and 868 deletions
--- a/scripts/run_ollama.py
+++ b/scripts/run_ollama.py
@@ -1,23 +0,0 @@
-from ollama import chat, ChatResponse
-
-
-def main():
-    model_name = "mistral"
-
-    # The messages to pass to the model
-    messages = [
-        {
-            "role": "user",
-            "content": "Why is the sky blue?",
-        }
-    ]
-
-    # Call ollama's chat function
-    response: ChatResponse = chat(model=model_name, messages=messages)
-
-    # Print the full message content
-    print(response.message.content)
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/run_worker.py
+++ b/scripts/run_worker.py
@@ -17,18 +17,18 @@ async def main():
    load_dotenv(override=True)

    # Print LLM configuration info
-    llm_provider = os.environ.get("LLM_PROVIDER", "openai").lower()
-    print(f"Worker will use LLM provider: {llm_provider}")
+    llm_model = os.environ.get("LLM_MODEL", "openai/gpt-4")
+    print(f"Worker will use LLM model: {llm_model}")

    # Create the client
    client = await get_temporal_client()

-    # Initialize the activities class once with the specified LLM provider
+    # Initialize the activities class
    activities = ToolActivities()
-    print(f"ToolActivities initialized with LLM provider: {llm_provider}")
+    print(f"ToolActivities initialized with LLM model: {llm_model}")

    # If using Ollama, pre-load the model to avoid cold start latency
-    if llm_provider == "ollama":
+    if llm_model.startswith("ollama"):
        print("\n======== OLLAMA MODEL INITIALIZATION ========")
        print("Ollama models need to be loaded into memory on first use.")
        print("This may take 30+ seconds depending on your hardware and model size.")
@@ -51,8 +51,6 @@ async def main():
    print("Worker ready to process tasks!")
    logging.basicConfig(level=logging.WARN)

-
-
    # Run the worker
    with concurrent.futures.ThreadPoolExecutor(max_workers=100) as activity_executor:
        worker = Worker(