Skip to content

Commit 7bf82b7

Browse files
authored
Add empty checks for NLP graphs (#2133)
1 parent d6e6191 commit 7bf82b7

File tree

5 files changed

+33
-10
lines changed

5 files changed

+33
-10
lines changed

packages/graphrag/graphrag/index/operations/build_noun_graph/build_noun_graph.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,9 +98,11 @@ def _extract_edges(
9898
Input: nodes_df with schema [id, title, frequency, text_unit_ids]
9999
Returns: edges_df with schema [source, target, weight, text_unit_ids]
100100
"""
101+
if nodes_df.empty:
102+
return pd.DataFrame(columns=["source", "target", "weight", "text_unit_ids"])
103+
101104
text_units_df = nodes_df.explode("text_unit_ids")
102105
text_units_df = text_units_df.rename(columns={"text_unit_ids": "text_unit_id"})
103-
104106
text_units_df = (
105107
text_units_df.groupby("text_unit_id")
106108
.agg({"title": lambda x: list(x) if len(x) > 1 else np.nan})

packages/graphrag/graphrag/index/operations/prune_graph.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ def prune_graph(
6464
])
6565

6666
# remove edges by min weight
67+
if len(graph.edges) == 0:
68+
return graph
69+
6770
if min_edge_weight_pct > 0:
6871
min_edge_weight = np.percentile(
6972
[data[schemas.EDGE_WEIGHT] for _, _, data in graph.edges(data=True)],

packages/graphrag/graphrag/index/workflows/extract_graph.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -121,14 +121,14 @@ async def extract_graph(
121121
async_type=extraction_async_type,
122122
)
123123

124-
if not _validate_data(extracted_entities):
125-
error_msg = "Entity Extraction failed. No entities detected during extraction."
124+
if len(extracted_entities) == 0:
125+
error_msg = "Graph Extraction failed. No entities detected during extraction."
126126
logger.error(error_msg)
127127
raise ValueError(error_msg)
128128

129-
if not _validate_data(extracted_relationships):
129+
if len(extracted_relationships) == 0:
130130
error_msg = (
131-
"Entity Extraction failed. No relationships detected during extraction."
131+
"Graph Extraction failed. No relationships detected during extraction."
132132
)
133133
logger.error(error_msg)
134134
raise ValueError(error_msg)
@@ -180,8 +180,3 @@ async def get_summarized_entities_relationships(
180180
extracted_entities.drop(columns=["description"], inplace=True)
181181
entities = extracted_entities.merge(entity_summaries, on="title", how="left")
182182
return entities, relationships
183-
184-
185-
def _validate_data(df: pd.DataFrame) -> bool:
186-
"""Validate that the dataframe has data."""
187-
return len(df) > 0

packages/graphrag/graphrag/index/workflows/extract_graph_nlp.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,19 @@ async def extract_graph_nlp(
7575
cache=cache,
7676
)
7777

78+
if len(extracted_nodes) == 0:
79+
error_msg = (
80+
"NLP Graph Extraction failed. No entities detected during extraction."
81+
)
82+
logger.error(error_msg)
83+
raise ValueError(error_msg)
84+
85+
if len(extracted_edges) == 0:
86+
error_msg = (
87+
"NLP Graph Extraction failed. No relationships detected during extraction."
88+
)
89+
logger.error(error_msg)
90+
7891
# add in any other columns required by downstream workflows
7992
extracted_nodes["type"] = "NOUN PHRASE"
8093
extracted_nodes["description"] = ""

packages/graphrag/graphrag/index/workflows/prune_graph.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,16 @@ def prune_graph(
6969
lcc_only=pruning_config.lcc_only,
7070
)
7171

72+
if len(pruned.nodes) == 0:
73+
error_msg = "Graph Pruning failed. No entities remain."
74+
logger.error(error_msg)
75+
raise ValueError(error_msg)
76+
77+
if len(pruned.edges) == 0:
78+
error_msg = "Graph Pruning failed. No relationships remain."
79+
logger.error(error_msg)
80+
raise ValueError(error_msg)
81+
7282
pruned_nodes, pruned_edges = graph_to_dataframes(
7383
pruned, node_columns=["title"], edge_columns=["source", "target"]
7484
)

0 commit comments

Comments
 (0)