from langchain.schema import Document
from rag_opt import init_chat_model
from rag_opt.rag import Parser
# Use custom parser
parser = Parser(path="./data")
docs = parser.load_docs()
# Or provide documents directly
documents = [
Document(page_content="Your text here..."),
Document(page_content="More content...")
]
llm = init_chat_model(
model="gpt-3.5-turbo",
model_provider="openai",
api_key="sk-***"
)
generator = DatasetGenerator(
llm=llm,
parser=parser # or pass documents to generate()
)
dataset = generator.generate(
n=5,
source_docs=documents
)