RAGOpt uses LangChain’s LLMChain by default. Agentic RAG requires explicit
configuration.
macOS users may need to run:
export DYLD_LIBRARY_PATH=$(brew --prefix libomp)/lib:$DYLD_LIBRARY_PATHbefore using RAGWorkflow with hybrid retriever to avoid OpenMP errors.Overview
RAGOpt’sRAGWorkflow class lets you build custom RAG pipelines with modular components:
- Parser - Load and parse documents
- Chunker - Split documents into chunks
- Indexer - Store embeddings in vector database
- Retriever - Retrieve relevant documents
- Reranker (Optional) - Rerank retrieved documents
- LLM - Generate responses
Complete Example
Copy
from rag_opt.rag import Parser, Splitter, Indexer, RAGWorkflow
from rag_opt import (init_embeddings,
init_chat_model,
init_reranker,
init_vectorstore,
init_chat_model
)
llm = init_chat_model(
model="gpt-3.5-turbo",
model_provider="openai",
api_key=OPENAI_API_KEY
)
embeddings = init_embeddings(
model="sentence-transformers/all-MiniLM-L6-v2",
model_provider="huggingface",
api_key=HUGGINFACE_API_KEY
)
vector_store = init_vectorstore(
provider="chroma",
embeddings=embeddings,
collection_name="my_knowledge_base"
)
reranker = init_reranker(
provider="flashrank",
model_name="ms-marco-MiniLM-L-12-v2",
)
# Load and index documents
# folder path to your data (cold be file path too)
parser = Parser(path=MY_DATA_PATH)
documents = parser.load_docs()
text_splitter = Splitter(
chunk_size=1000,
chunk_overlap=200,
splitting_type="recursive_character"
)
chunks = text_splitter.split_documents(documents[0:10])
indexer = Indexer(
chunk_size=1000,
chunk_overlap=200,
vector_store=vector_store
)
indexer.store(chunks)
# # Create workflow
rag = RAGWorkflow(
embeddings=embeddings,
vector_store=vector_store,
llm=llm,
reranker=reranker,
retrieval_config={
"search_type": "mmr",
"k": 5,
"fetch_k": 20
}
)
# # Query
result = rag.get_answer("What are the key features of the product?")
print(f"Answer: {result.answer}")
print(f"Sources: {len(result.contexts)}")
print(f"Latency: {result.latency.total:.2f}s")
Component Initialization
Embeddings
Copy
from rag_opt import init_embeddings
# HuggingFace (local)
embeddings = init_embeddings(
model="sentence-transformers/all-MiniLM-L6-v2",
model_provider="huggingface"
)
# OpenAI
embeddings = init_embeddings(
model="text-embedding-3-small",
model_provider="openai",
api_key="your-openai-key"
)
# Cohere
embeddings = init_embeddings(
model="embed-english-v3.0",
model_provider="cohere",
api_key="your-cohere-key"
)
LLM
Copy
from rag_opt import init_chat_model
# OpenAI
llm = init_chat_model(
model="gpt-4",
model_provider="openai",
api_key="your-api-key",
temperature=0.7
)
# HuggingFace
llm = init_chat_model(
model="meta-llama/Llama-2-7b-chat-hf",
model_provider="huggingface",
api_key="your-hf-token"
)
# Anthropic
llm = init_chat_model(
model="claude-3-sonnet-20240229",
model_provider="anthropic",
api_key="your-anthropic-key"
)
Vector Store
Copy
from rag_opt import init_vectorstore
# Chroma (local)
vector_store = init_vectorstore(
provider="chroma",
embeddings=embeddings,
collection_name="my_docs"
)
# FAISS (local)
vector_store = init_vectorstore(
provider="faiss",
embeddings=embeddings
)
# Qdrant
# vector_store = init_vectorstore(
# provider="qdrant",
# embeddings=embeddings,
# url="http://localhost:6333",
# collection_name="my_collection"
# )
# Pinecone
# vector_store = init_vectorstore(
# provider="pinecone",
# embeddings=embeddings,
# api_key="your-pinecone-key",
# index_name="my-index",
# environment="us-west1-gcp"
# )
Reranker (Optional)
Copy
from rag_opt import init_reranker
# Cross-Encoder (HuggingFace)
reranker = init_reranker(
provider="huggingface",
model_name="BAAI/bge-reranker-base"
)
# Cohere
reranker = init_reranker(
provider="cohere",
api_key="your-cohere-key",
model_name="rerank-english-v3.0"
)
# FlashRank (fast, local)
reranker = init_reranker(
provider="flashrank",
model_name="ms-marco-MiniLM-L-12-v2"
)
# Jina AI
reranker = init_reranker(
provider="jina",
api_key="your-jina-key",
model_name="jina-reranker-v1-base-en"
)
Document Loading & Indexing
Copy
from rag_opt.rag import Parser, Indexer, Splitter
from rag_opt import init_vectorstore
vector_store = init_vectorstore(
provider="faiss",
embeddings=embeddings
)
# Load documents
parser = Parser(
path="./data",
glob="**/*.csv",
include_sub_dir=True,
use_multithreading=True
)
documents = parser.load_docs()
# Or from DataFrame
import pandas as pd
df = pd.read_csv("data.csv")
documents = Parser.from_df(df, page_content_column="text")
# Chunk and store
splitter = Splitter(
chunk_size=1000,
chunk_overlap=200,
splitting_type="recursive_character"
)
chunks = splitter.split_documents(documents)
indexer = Indexer(
chunk_size=1000,
chunk_overlap=200,
vector_store=vector_store
)
indexer.store(chunks)
Usage
Single Query
Copy
result = rag.get_answer("What is the capital of France?")
print(f"Answer: {result.answer}")
print(f"Contexts: {len(result.contexts)}")
print(f"Cost: ${result.cost.total}")
print(f"Latency: {result.latency.total:.2f}s")
Batch Processing
Copy
from rag_opt.dataset import TrainDataset, TrainDatasetItem
dataset = TrainDataset(items=[
TrainDatasetItem(
question="What is RAG?",
answer="Retrieval Augmented Generation...",
contexts=["..."]
),
TrainDatasetItem(
question="How does vector search work?",
answer="Vector search uses...",
contexts=["..."]
)
])
rag = RAGWorkflow(
embeddings=embeddings,
vector_store=vector_store,
llm=llm,
reranker=reranker,
retrieval_config={
"search_type": "mmr",
"k": 5,
"fetch_k": 20
}
)
eval_dataset = rag.get_batch_answers(dataset)
eval_dataset.to_json("results.json")
Using Optimized Configuration
Copy
from rag_opt import RAGConfig
# Load saved config
config = RAGConfig.from_json("./best_rag_config.json")
# Initialize from config
embeddings = init_embeddings(
model=config.embedding_model,
model_provider=config.embedding_provider
)
llm = init_chat_model(
model=config.llm_model,
model_provider=config.llm_provider
)
vector_store = init_vectorstore(
provider=config.vectorstore_provider,
embeddings=embeddings
)
rag = RAGWorkflow(
embeddings=embeddings,
vector_store=vector_store,
llm=llm,
retrieval_config={
"search_type": config.search_type,
"k": config.top_k
}
)
Agentic RAG (Experimental)
Agentic RAG is experimental and requires explicit agent initialization.
Copy
rag = RAGWorkflow(
embeddings=embeddings,
vector_store=vector_store,
llm=llm,
retrieval_config={
"search_type": config.search_type,
"k": config.top_k
}
)
result = rag.get_agentic_answer("What are the key features of the product?")
print(f"Answer: {result.answer}")
print(f"Sources: {len(result.contexts)}")
print(f"Latency: {result.latency.total:.2f}s")