You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.document_loaders import DirectoryLoader
from ragas.testset.generator import TestsetGenerator
import os
directory = "../test-directory"
file_content = """Ted is 37 years old and has a birthmark on his lower thigh. His favorite food is spaghetti with meatballs and his last vacation was to Punta Gorda. He is engaged to be married next year, and enjoys watching the Cincinnati Bengals play in the NFL."""
if not os.path.exists(directory):
os.makedirs(directory)
with open(os.path.join(directory, "test.txt"),'w') as dest:
dest.write(file_content)
loader = DirectoryLoader(directory)
documents = loader.load()
print(documents)
# generator with openai models
generator_llm = ChatOpenAI(model="gpt-3.5-turbo-16k")
critic_llm = ChatOpenAI(model="gpt-4")
embeddings = OpenAIEmbeddings()
generator = TestsetGenerator.from_langchain(
generator_llm,
critic_llm,
embeddings
)
# generate testset
testset = generator.generate_with_langchain_docs(documents, test_size=2)
Error trace
When interrupted, here is the resulting trace
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In[8], line 32
25 generator = TestsetGenerator.from_langchain(
26 generator_llm,
27 critic_llm,
28 embeddings
29 )
31 # generate testset
---> 32 testset = generator.generate_with_langchain_docs(documents, test_size=2)
File ~/salesbot/salesbot-venv/lib/python3.12/site-packages/ragas/testset/generator.py:175, in TestsetGenerator.generate_with_langchain_docs(self, documents, test_size, distributions, with_debugging_logs, is_async, raise_exceptions, run_config)
173 distributions = distributions or {}
174 # chunk documents and add to docstore
--> 175 self.docstore.add_documents(
176 [Document.from_langchain_document(doc) for doc in documents]
177 )
179 return self.generate(
180 test_size=test_size,
181 distributions=distributions,
(...)
185 run_config=run_config,
186 )
File ~/salesbot/salesbot-venv/lib/python3.12/site-packages/ragas/testset/docstore.py:215, in InMemoryDocumentStore.add_documents(self, docs, show_progress)
210 # split documents with self.splitter into smaller nodes
211 nodes = [
212 Node.from_langchain_document(d)
213 for d in self.splitter.transform_documents(docs)
214 ]
--> 215 self.add_nodes(nodes, show_progress=show_progress)
File ~/salesbot/salesbot-venv/lib/python3.12/site-packages/ragas/testset/docstore.py:252, in InMemoryDocumentStore.add_nodes(self, nodes, show_progress)
245 executor.submit(
246 self.extractor.extract,
247 n,
248 name=f"keyphrase-extraction[{i}]",
249 )
250 result_idx += 1
--> 252 results = executor.results()
253 if not results:
254 raise ExceptionInRunner()
File ~/salesbot/salesbot-venv/lib/python3.12/site-packages/ragas/executor.py:132, in Executor.results(self)
130 executor_job.start()
131 try:
--> 132 executor_job.join()
133 finally:
134 ...
File /opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py:1147, in Thread.join(self, timeout)
1144 raise RuntimeError("cannot join current thread")
1146 if timeout is None:
-> 1147 self._wait_for_tstate_lock()
1148 else:
1149 # the behavior of a negative timeout isn't documented, but
1150 # historically .join(timeout=x) for x<0 has acted as if timeout=0
1151 self._wait_for_tstate_lock(timeout=max(timeout, 0))
File /opt/homebrew/Cellar/python@3.12/3.12.2_1/Frameworks/Python.framework/Versions/3.12/lib/python3.12/threading.py:1167, in Thread._wait_for_tstate_lock(self, block, timeout)
1164 return
1166 try:
-> 1167 if lock.acquire(block, timeout):
1168 lock.release()
1169 self._stop()
KeyboardInterrupt:
Expected behavior
Production of a synthetic test set; code should not hang
Additional context
N/A
The text was updated successfully, but these errors were encountered:
[x] I have checked the documentation and related resources and couldn't resolve my bug.
The get started section on synthetic data generation is broken. For loading a very basic single file test case, the code hangs.
Ragas version: 0.1.7
Python version: 3.12.2
OS: MacOS Sonoma
Code to Reproduce
Error trace
When interrupted, here is the resulting trace
Expected behavior
Production of a synthetic test set; code should not hang
Additional context
N/A
The text was updated successfully, but these errors were encountered: