1010from graphrag .callbacks .workflow_callbacks import WorkflowCallbacks
1111from graphrag .config .embeddings import (
1212 community_full_content_embedding ,
13- community_summary_embedding ,
14- community_title_embedding ,
1513 create_index_name ,
16- document_text_embedding ,
1714 entity_description_embedding ,
18- entity_title_embedding ,
19- relationship_description_embedding ,
2015 text_unit_text_embedding ,
2116)
2217from graphrag .config .models .graph_rag_config import GraphRagConfig
@@ -47,29 +42,14 @@ async def run_workflow(
4742 logger .info ("Workflow started: generate_text_embeddings" )
4843 embedded_fields = config .embed_text .names
4944 logger .info ("Embedding the following fields: %s" , embedded_fields )
50- documents = None
51- relationships = None
5245 text_units = None
5346 entities = None
5447 community_reports = None
55- if document_text_embedding in embedded_fields :
56- documents = await load_table_from_storage ("documents" , context .output_storage )
57- if relationship_description_embedding in embedded_fields :
58- relationships = await load_table_from_storage (
59- "relationships" , context .output_storage
60- )
6148 if text_unit_text_embedding in embedded_fields :
6249 text_units = await load_table_from_storage ("text_units" , context .output_storage )
63- if (
64- entity_title_embedding in embedded_fields
65- or entity_description_embedding in embedded_fields
66- ):
50+ if entity_description_embedding in embedded_fields :
6751 entities = await load_table_from_storage ("entities" , context .output_storage )
68- if (
69- community_title_embedding in embedded_fields
70- or community_summary_embedding in embedded_fields
71- or community_full_content_embedding in embedded_fields
72- ):
52+ if community_full_content_embedding in embedded_fields :
7353 community_reports = await load_table_from_storage (
7454 "community_reports" , context .output_storage
7555 )
@@ -87,8 +67,6 @@ async def run_workflow(
8767 tokenizer = get_tokenizer (model_config )
8868
8969 output = await generate_text_embeddings (
90- documents = documents ,
91- relationships = relationships ,
9270 text_units = text_units ,
9371 entities = entities ,
9472 community_reports = community_reports ,
@@ -115,8 +93,6 @@ async def run_workflow(
11593
11694
11795async def generate_text_embeddings (
118- documents : pd .DataFrame | None ,
119- relationships : pd .DataFrame | None ,
12096 text_units : pd .DataFrame | None ,
12197 entities : pd .DataFrame | None ,
12298 community_reports : pd .DataFrame | None ,
@@ -131,26 +107,12 @@ async def generate_text_embeddings(
131107) -> dict [str , pd .DataFrame ]:
132108 """All the steps to generate all embeddings."""
133109 embedding_param_map = {
134- document_text_embedding : {
135- "data" : documents .loc [:, ["id" , "text" ]] if documents is not None else None ,
136- "embed_column" : "text" ,
137- },
138- relationship_description_embedding : {
139- "data" : relationships .loc [:, ["id" , "description" ]]
140- if relationships is not None
141- else None ,
142- "embed_column" : "description" ,
143- },
144110 text_unit_text_embedding : {
145111 "data" : text_units .loc [:, ["id" , "text" ]]
146112 if text_units is not None
147113 else None ,
148114 "embed_column" : "text" ,
149115 },
150- entity_title_embedding : {
151- "data" : entities .loc [:, ["id" , "title" ]] if entities is not None else None ,
152- "embed_column" : "title" ,
153- },
154116 entity_description_embedding : {
155117 "data" : entities .loc [:, ["id" , "title" , "description" ]].assign (
156118 title_description = lambda df : df ["title" ] + ":" + df ["description" ]
@@ -159,18 +121,6 @@ async def generate_text_embeddings(
159121 else None ,
160122 "embed_column" : "title_description" ,
161123 },
162- community_title_embedding : {
163- "data" : community_reports .loc [:, ["id" , "title" ]]
164- if community_reports is not None
165- else None ,
166- "embed_column" : "title" ,
167- },
168- community_summary_embedding : {
169- "data" : community_reports .loc [:, ["id" , "summary" ]]
170- if community_reports is not None
171- else None ,
172- "embed_column" : "summary" ,
173- },
174124 community_full_content_embedding : {
175125 "data" : community_reports .loc [:, ["id" , "full_content" ]]
176126 if community_reports is not None
0 commit comments