feat: LiteLLM integration

This commit is contained in:
Jonathan Muszkat
2025-05-20 13:53:20 -03:00
committed by Steve Androulakis
parent 847f4bbaef
commit dcb6271c23
8 changed files with 1337 additions and 868 deletions

View File

@@ -1,23 +0,0 @@
from ollama import chat, ChatResponse
def main():
model_name = "mistral"
# The messages to pass to the model
messages = [
{
"role": "user",
"content": "Why is the sky blue?",
}
]
# Call ollama's chat function
response: ChatResponse = chat(model=model_name, messages=messages)
# Print the full message content
print(response.message.content)
if __name__ == "__main__":
main()

View File

@@ -17,18 +17,18 @@ async def main():
load_dotenv(override=True)
# Print LLM configuration info
llm_provider = os.environ.get("LLM_PROVIDER", "openai").lower()
print(f"Worker will use LLM provider: {llm_provider}")
llm_model = os.environ.get("LLM_MODEL", "openai/gpt-4")
print(f"Worker will use LLM model: {llm_model}")
# Create the client
client = await get_temporal_client()
# Initialize the activities class once with the specified LLM provider
# Initialize the activities class
activities = ToolActivities()
print(f"ToolActivities initialized with LLM provider: {llm_provider}")
print(f"ToolActivities initialized with LLM model: {llm_model}")
# If using Ollama, pre-load the model to avoid cold start latency
if llm_provider == "ollama":
if llm_model.startswith("ollama"):
print("\n======== OLLAMA MODEL INITIALIZATION ========")
print("Ollama models need to be loaded into memory on first use.")
print("This may take 30+ seconds depending on your hardware and model size.")
@@ -51,8 +51,6 @@ async def main():
print("Worker ready to process tasks!")
logging.basicConfig(level=logging.WARN)
# Run the worker
with concurrent.futures.ThreadPoolExecutor(max_workers=100) as activity_executor:
worker = Worker(