Skip to content

Commit 76eebe1

Browse files
committed
rename kg to knowledge graph and refactor on persona import from ragas
1 parent afe726e commit 76eebe1

File tree

3 files changed

+78
-91
lines changed

3 files changed

+78
-91
lines changed

backend/llm_eval/qa_catalog/generator/implementation/ragas/generator.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,15 @@
1010
from pydantic import ValidationError
1111
from ragas.embeddings import LangchainEmbeddingsWrapper
1212
from ragas.testset import TestsetGenerator
13+
from ragas.testset import persona as ragas_persona
1314
from ragas.testset.graph import KnowledgeGraph, Node, NodeType
14-
from ragas.testset.persona import Persona
1515
from ragas.testset.synthesizers import (
1616
BaseSynthesizer,
1717
MultiHopAbstractQuerySynthesizer,
1818
MultiHopSpecificQuerySynthesizer,
1919
SingleHopSpecificQuerySynthesizer,
2020
)
21-
from ragas.testset.synthesizers.generate import (
22-
LangchainLLMWrapper,
23-
)
21+
from ragas.testset.synthesizers.generate import LangchainLLMWrapper
2422
from ragas.testset.synthesizers.testset_schema import Testset
2523
from ragas.testset.transforms import (
2624
EmbeddingExtractor,
@@ -130,7 +128,7 @@ def __init__(
130128

131129
self.personas = (
132130
[
133-
Persona(name=p.name, role_description=p.description)
131+
ragas_persona.Persona(name=p.name, role_description=p.description)
134132
for p in self.config.personas
135133
]
136134
if self.config.personas
@@ -237,7 +235,7 @@ def _split(doc: Document, token_length: int) -> list[Document]:
237235

238236
def apply_knowledge_graph_transformations(
239237
self,
240-
kg: KnowledgeGraph,
238+
knowledge_graph: KnowledgeGraph,
241239
) -> None:
242240
headline_extractor = HeadlinesExtractor(llm=self.llm, max_num=20)
243241
headline_splitter = HeadlineSplitter(max_tokens=1500, min_tokens=100)
@@ -264,28 +262,29 @@ def apply_knowledge_graph_transformations(
264262
ner_extractor,
265263
]
266264

267-
apply_transforms(kg, transforms=transforms)
265+
apply_transforms(knowledge_graph, transforms=transforms)
268266

269267
def create_knowledge_graph(self) -> KnowledgeGraph:
270268
"""
271269
Loads the knowledge graph if already exists
272270
and compares it's nodes with the current documents.
273-
If the existent graph has these documents already it uses the existent kg
274-
otherwise create a new kg out ouf the documents
271+
If the existent graph has these documents
272+
already it uses the existent knowledge graph
273+
otherwise create a new knowledge graph out ouf the documents
275274
"""
276275

277276
docs = self._load_and_process_documents() # chunks of documents
278277
if len(docs) == 0:
279278
raise RuntimeError("No documents found")
280279

281280
chunks = self.split_documents(docs)
282-
kg = KnowledgeGraph(
281+
knowledge_graph = KnowledgeGraph(
283282
nodes=self._create_knowledge_graph_nodes(chunks),
284283
)
285284

286-
self.apply_knowledge_graph_transformations(kg)
285+
self.apply_knowledge_graph_transformations(knowledge_graph)
287286

288-
return kg
287+
return knowledge_graph
289288

290289
@deprecated("Until we have a better way to handle knowledge graph caching")
291290
def load_exiting_knowledge_graph(
@@ -367,12 +366,13 @@ async def a_create_synthetic_qa(
367366
self,
368367
collect_samples: Callable[[list[SyntheticQAPair]], Coroutine],
369368
) -> None:
370-
kg = self.create_knowledge_graph()
369+
knowledge_graph = self.create_knowledge_graph()
371370

372371
if not self.personas:
373-
from ragas.testset.persona import generate_personas_from_kg
374-
375-
self.personas = generate_personas_from_kg(kg, self.llm)
372+
self.personas = ragas_persona.generate_personas_from_kg(
373+
knowledge_graph,
374+
self.llm,
375+
)
376376
if not self.personas:
377377
raise ValueError("Failed to generate personas")
378378

@@ -381,11 +381,11 @@ async def a_create_synthetic_qa(
381381
generator = TestsetGenerator(
382382
llm=self.llm,
383383
embedding_model=self.embeddings,
384-
knowledge_graph=kg,
384+
knowledge_graph=knowledge_graph,
385385
persona_list=self.personas,
386386
)
387387

388-
query_distribution = self.create_query_distribution(kg)
388+
query_distribution = self.create_query_distribution(knowledge_graph)
389389

390390
testset = self.generate_testset(
391391
generator,

0 commit comments

Comments
 (0)