Code Examples¶

Tool Calling with orichain.llm¶

Orichain provides a consistent tool-calling interface in both LLM and AsyncLLM classes. Tool calling works seamlessly with both streaming and non-streaming responses.

Thanks to the unified design, you only need to change the model_name, provider and authentication parameters - the rest of your code remains unchanged.

import os
from dotenv import load_dotenv
from orichain.llm import AsyncLLM  # Use LLM for synchronous usage

load_dotenv()

tools = [
    {
        "name": "get_weather",
        "description": "Retrieve the current weather for a given city.",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {
                    "type": "string",
                    "description": "The city name to fetch the weather for.",
                },
            },
            "required": ["city"],
        }
    }
]

llm = AsyncLLM(
    model_name="gpt-5-mini",
    provider="OpenAI",
    api_key=os.getenv("OPENAI_KEY"),
)

response = await llm(
    user_message="What’s the weather in Berlin?",
    system_prompt="You are a helpful assistant that provides weather updates.",
    tools=tools,
    tool_choice="auto",  # Options: "auto", "required", "none", or a specific tool name
)

print(response.get("tools"))

Output:

[
    {
        "id": "call_wCYarGvab6vjeTwyHPPuVzod",
        "function": {
            "arguments": {"city": "Berlin"},
            "name": "get_weather"
        }
    }
]

Using Orichain in a Production-Ready FastAPI Application¶

The following example demonstrates how to integrate Orichain with a FastAPI application for a production-grade setup. It shows how to use embeddings, a knowledge base, and an LLM together to build a retrieval-augmented chatbot with both streaming and non-streaming responses.

from fastapi import FastAPI, Request
from fastapi.responses import JSONResponse, Response, StreamingResponse

from orichain.embeddings import AsyncEmbeddingModel
from orichain.knowledge_base import AsyncKnowledgeBase
from orichain.llm import AsyncLLM

import os
import art
from dotenv import load_dotenv
from typing import Dict

load_dotenv()

# Initialize embedding model
embedding_model = AsyncEmbeddingModel(
    model_name="text-embedding-ada-002",
    provider="OpenAI",
    api_key=os.getenv("OPENAI_KEY")
)

# Initialize vector database manager
knowledge_base_manager = AsyncKnowledgeBase(
    vector_db_type="pinecone",
    api_key=os.getenv("PINECONE_KEY"),
    index_name="<set your index name>",
    namespace="<set your namespace>",
)

# Initialize LLM
llm = AsyncLLM(
    model_name="gpt-5-mini",
    provider="OpenAI",
    api_key=os.getenv("OPENAI_KEY")
)

app = FastAPI(redoc_url=None, docs_url=None)

@app.post("/generative_response")
async def generate(request: Request) -> Response:
    # Parse incoming request
    request_json = await request.json()

    user_message = request_json.get("user_message")
    prev_pairs = request_json.get("prev_pairs")
    metadata = request_json.get("metadata")

    # Generate embeddings for the user query
    user_message_vector = await embedding_model(user_message=user_message)

    if isinstance(user_message_vector, Dict):
        return JSONResponse(user_message_vector)

    # Retrieve relevant chunks from the knowledge base
    retrived_chunks = await knowledge_base_manager(
        user_message_vector=user_message_vector,
        num_of_chunks=5,
    )

    if isinstance(retrived_chunks, Dict) and "error" in retrived_chunks:
        return JSONResponse(retrived_chunks)

    # Convert retrieved data into plain text list
    matched_sentence = convert_to_text_list(retrived_chunks)
    # (Define `convert_to_text_list` to process KB output into a list of strings)

    system_prompt = f"""As a helpful, engaging, and friendly chatbot, answer the user's query based on the following context:
    <data>
    {"\n\n".join(matched_sentence)}
    </data>"""

    # Streaming response
    if metadata.get("stream"):
        return StreamingResponse(
            llm.stream(
                request=request,
                user_message=user_message,
                matched_sentence=matched_sentence,
                system_prompt=system_prompt,
                chat_hist=prev_pairs
            ),
            headers={
                "Content-Type": "text/event-stream",
                "Cache-Control": "no-cache",
                "X-Accel-Buffering": "no",
            },
            media_type="text/event-stream",
        )
    # Non-streaming response
    else:
        llm_response = await llm(
            request=request,
            user_message=user_message,
            matched_sentence=matched_sentence,
            system_prompt=system_prompt,
            chat_hist=prev_pairs
        )

        return JSONResponse(llm_response)

print(art.text2art("Server has started!", font="small"))