Skip to content

Commit 77fb7d9

Browse files
authored
Logging improvements (#2030)
* Turn down blob/cosmos exception reporting to match file storage * Restore indexing-engine.log * Restore some basic console logging and progress for index CLI * Semver * Ignore small ruff complaints * Fix CLI console printing
1 parent 469ee85 commit 77fb7d9

File tree

12 files changed

+136
-270
lines changed

12 files changed

+136
-270
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"type": "patch",
3+
"description": "Improve upon recent logging refactor"
4+
}

graphrag/__init__.py

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,3 @@
22
# Licensed under the MIT License
33

44
"""The GraphRAG package."""
5-
6-
import logging
7-
8-
from graphrag.logger.standard_logging import init_console_logger
9-
10-
logger = logging.getLogger(__name__)
11-
init_console_logger()
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright (c) 2024 Microsoft Corporation.
2+
# Licensed under the MIT License
3+
4+
"""A logger that emits updates from the indexing engine to the console."""
5+
6+
from graphrag.callbacks.noop_workflow_callbacks import NoopWorkflowCallbacks
7+
from graphrag.index.typing.pipeline_run_result import PipelineRunResult
8+
from graphrag.logger.progress import Progress
9+
10+
# ruff: noqa: T201
11+
12+
13+
class ConsoleWorkflowCallbacks(NoopWorkflowCallbacks):
14+
"""A logger that writes to a console."""
15+
16+
_verbose = False
17+
18+
def __init__(self, verbose=False):
19+
self._verbose = verbose
20+
21+
def pipeline_start(self, names: list[str]) -> None:
22+
"""Execute this callback to signal when the entire pipeline starts."""
23+
print("Starting pipeline with workflows:", ", ".join(names))
24+
25+
def pipeline_end(self, results: list[PipelineRunResult]) -> None:
26+
"""Execute this callback to signal when the entire pipeline ends."""
27+
print("Pipeline complete")
28+
29+
def workflow_start(self, name: str, instance: object) -> None:
30+
"""Execute this callback when a workflow starts."""
31+
print(f"Starting workflow: {name}")
32+
33+
def workflow_end(self, name: str, instance: object) -> None:
34+
"""Execute this callback when a workflow ends."""
35+
print("") # account for potential return on prior progress
36+
print(f"Workflow complete: {name}")
37+
if self._verbose:
38+
print(instance)
39+
40+
def progress(self, progress: Progress) -> None:
41+
"""Handle when progress occurs."""
42+
complete = progress.completed_items or 0
43+
total = progress.total_items or 1
44+
percent = round((complete / total) * 100)
45+
start = f" {complete} / {total} "
46+
print(f"{start:{'.'}<{percent}}", flush=True, end="\r")

graphrag/cli/index.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,11 @@
1010
from pathlib import Path
1111

1212
import graphrag.api as api
13+
from graphrag.callbacks.console_workflow_callbacks import ConsoleWorkflowCallbacks
1314
from graphrag.config.enums import CacheType, IndexingMethod, ReportingType
1415
from graphrag.config.load_config import load_config
1516
from graphrag.index.validate_config import validate_config_names
17+
from graphrag.logger.standard_logging import DEFAULT_LOG_FILENAME
1618
from graphrag.utils.cli import redact
1719

1820
# Ignore warnings from numba
@@ -115,7 +117,6 @@ def _run_index(
115117
# Initialize loggers and reporting config
116118
init_loggers(
117119
config=config,
118-
root_dir=str(config.root_dir) if config.root_dir else None,
119120
verbose=verbose,
120121
)
121122

@@ -124,8 +125,8 @@ def _run_index(
124125

125126
# Log the configuration details
126127
if config.reporting.type == ReportingType.file:
127-
log_dir = Path(config.root_dir or "") / (config.reporting.base_dir or "")
128-
log_path = log_dir / "logs.txt"
128+
log_dir = Path(config.root_dir) / config.reporting.base_dir
129+
log_path = log_dir / DEFAULT_LOG_FILENAME
129130
logger.info("Logging enabled at %s", log_path)
130131
else:
131132
logger.info(
@@ -154,6 +155,7 @@ def _run_index(
154155
method=method,
155156
is_update_run=is_update_run,
156157
memory_profile=memprofile,
158+
callbacks=[ConsoleWorkflowCallbacks(verbose=verbose)],
157159
)
158160
)
159161
encountered_errors = any(

graphrag/cli/prompt_tune.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import graphrag.api as api
1010
from graphrag.config.enums import ReportingType
1111
from graphrag.config.load_config import load_config
12+
from graphrag.logger.standard_logging import DEFAULT_LOG_FILENAME
1213
from graphrag.prompt_tune.generator.community_report_summarization import (
1314
COMMUNITY_SUMMARIZATION_FILENAME,
1415
)
@@ -75,14 +76,13 @@ async def prompt_tune(
7576
# initialize loggers with config
7677
init_loggers(
7778
config=graph_config,
78-
root_dir=str(root_path),
7979
verbose=verbose,
8080
)
8181

8282
# log the configuration details
8383
if graph_config.reporting.type == ReportingType.file:
84-
log_dir = Path(root_path) / (graph_config.reporting.base_dir or "")
85-
log_path = log_dir / "logs.txt"
84+
log_dir = Path(root_path) / graph_config.reporting.base_dir
85+
log_path = log_dir / DEFAULT_LOG_FILENAME
8686
logger.info("Logging enabled at %s", log_path)
8787
else:
8888
logger.info(

graphrag/cli/query.py

Lines changed: 21 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
"""CLI implementation of the query subcommand."""
55

66
import asyncio
7-
import logging
87
import sys
98
from pathlib import Path
109
from typing import TYPE_CHECKING, Any
@@ -19,8 +18,7 @@
1918
if TYPE_CHECKING:
2019
import pandas as pd
2120

22-
# Initialize standard logger
23-
logger = logging.getLogger(__name__)
21+
# ruff: noqa: T201
2422

2523

2624
def run_global_search(
@@ -61,10 +59,6 @@ def run_global_search(
6159
final_community_reports_list = dataframe_dict["community_reports"]
6260
index_names = dataframe_dict["index_names"]
6361

64-
logger.info(
65-
"Running multi-index global search on indexes: %s",
66-
dataframe_dict["index_names"],
67-
)
6862
response, context_data = asyncio.run(
6963
api.multi_index_global_search(
7064
config=config,
@@ -80,11 +74,7 @@ def run_global_search(
8074
verbose=verbose,
8175
)
8276
)
83-
# log the full response at INFO level for user visibility but at DEBUG level in the API layer
84-
logger.info("Query Response:\n%s", response)
85-
86-
# NOTE: we return the response and context data here purely as a complete demonstration of the API.
87-
# External users should use the API directly to get the response and context data.
77+
print(response)
8878
return response, context_data
8979

9080
# Otherwise, call the Single-Index Global Search API
@@ -118,9 +108,9 @@ def on_context(context: Any) -> None:
118108
verbose=verbose,
119109
):
120110
full_response += stream_chunk
121-
print(stream_chunk, end="") # noqa: T201
122-
sys.stdout.flush() # flush output buffer to display text immediately
123-
print() # noqa: T201
111+
print(stream_chunk, end="")
112+
sys.stdout.flush()
113+
print()
124114
return full_response, context_data
125115

126116
return asyncio.run(run_streaming_search())
@@ -138,11 +128,8 @@ def on_context(context: Any) -> None:
138128
verbose=verbose,
139129
)
140130
)
141-
# log the full response at INFO level for user visibility but at DEBUG level in the API layer
142-
logger.info("Global Search Response:\n%s", response)
131+
print(response)
143132

144-
# NOTE: we return the response and context data here purely as a complete demonstration of the API.
145-
# External users should use the API directly to get the response and context data.
146133
return response, context_data
147134

148135

@@ -188,11 +175,6 @@ def run_local_search(
188175
final_relationships_list = dataframe_dict["relationships"]
189176
index_names = dataframe_dict["index_names"]
190177

191-
logger.info(
192-
"Running multi-index local search on indexes: %s",
193-
dataframe_dict["index_names"],
194-
)
195-
196178
# If any covariates tables are missing from any index, set the covariates list to None
197179
if len(dataframe_dict["covariates"]) != dataframe_dict["num_indexes"]:
198180
final_covariates_list = None
@@ -216,11 +198,8 @@ def run_local_search(
216198
verbose=verbose,
217199
)
218200
)
219-
# log the full response at INFO level for user visibility but at DEBUG level in the API layer
220-
logger.info("Local Search Response:\n%s", response)
201+
print(response)
221202

222-
# NOTE: we return the response and context data here purely as a complete demonstration of the API.
223-
# External users should use the API directly to get the response and context data.
224203
return response, context_data
225204

226205
# Otherwise, call the Single-Index Local Search API
@@ -259,9 +238,9 @@ def on_context(context: Any) -> None:
259238
verbose=verbose,
260239
):
261240
full_response += stream_chunk
262-
print(stream_chunk, end="") # noqa: T201
263-
sys.stdout.flush() # flush output buffer to display text immediately
264-
print() # noqa: T201
241+
print(stream_chunk, end="")
242+
sys.stdout.flush()
243+
print()
265244
return full_response, context_data
266245

267246
return asyncio.run(run_streaming_search())
@@ -281,11 +260,8 @@ def on_context(context: Any) -> None:
281260
verbose=verbose,
282261
)
283262
)
284-
# log the full response at INFO level for user visibility but at DEBUG level in the API layer
285-
logger.info("Local Search Response:\n%s", response)
263+
print(response)
286264

287-
# NOTE: we return the response and context data here purely as a complete demonstration of the API.
288-
# External users should use the API directly to get the response and context data.
289265
return response, context_data
290266

291267

@@ -329,11 +305,6 @@ def run_drift_search(
329305
final_relationships_list = dataframe_dict["relationships"]
330306
index_names = dataframe_dict["index_names"]
331307

332-
logger.info(
333-
"Running multi-index drift search on indexes: %s",
334-
dataframe_dict["index_names"],
335-
)
336-
337308
response, context_data = asyncio.run(
338309
api.multi_index_drift_search(
339310
config=config,
@@ -350,11 +321,8 @@ def run_drift_search(
350321
verbose=verbose,
351322
)
352323
)
353-
# log the full response at INFO level for user visibility but at DEBUG level in the API layer
354-
logger.info("DRIFT Search Response:\n%s", response)
324+
print(response)
355325

356-
# NOTE: we return the response and context data here purely as a complete demonstration of the API.
357-
# External users should use the API directly to get the response and context data.
358326
return response, context_data
359327

360328
# Otherwise, call the Single-Index Drift Search API
@@ -391,9 +359,9 @@ def on_context(context: Any) -> None:
391359
verbose=verbose,
392360
):
393361
full_response += stream_chunk
394-
print(stream_chunk, end="") # noqa: T201
395-
sys.stdout.flush() # flush output buffer to display text immediately
396-
print() # noqa: T201
362+
print(stream_chunk, end="")
363+
sys.stdout.flush()
364+
print()
397365
return full_response, context_data
398366

399367
return asyncio.run(run_streaming_search())
@@ -413,11 +381,8 @@ def on_context(context: Any) -> None:
413381
verbose=verbose,
414382
)
415383
)
416-
# log the full response at INFO level for user visibility but at DEBUG level in the API layer
417-
logger.info("DRIFT Search Response:\n%s", response)
384+
print(response)
418385

419-
# NOTE: we return the response and context data here purely as a complete demonstration of the API.
420-
# External users should use the API directly to get the response and context data.
421386
return response, context_data
422387

423388

@@ -451,11 +416,6 @@ def run_basic_search(
451416
final_text_units_list = dataframe_dict["text_units"]
452417
index_names = dataframe_dict["index_names"]
453418

454-
logger.info(
455-
"Running multi-index basic search on indexes: %s",
456-
dataframe_dict["index_names"],
457-
)
458-
459419
response, context_data = asyncio.run(
460420
api.multi_index_basic_search(
461421
config=config,
@@ -466,11 +426,8 @@ def run_basic_search(
466426
verbose=verbose,
467427
)
468428
)
469-
# log the full response at INFO level for user visibility but at DEBUG level in the API layer
470-
logger.info("Basic Search Response:\n%s", response)
429+
print(response)
471430

472-
# NOTE: we return the response and context data here purely as a complete demonstration of the API.
473-
# External users should use the API directly to get the response and context data.
474431
return response, context_data
475432

476433
# Otherwise, call the Single-Index Basic Search API
@@ -497,9 +454,9 @@ def on_context(context: Any) -> None:
497454
verbose=verbose,
498455
):
499456
full_response += stream_chunk
500-
print(stream_chunk, end="") # noqa: T201
501-
sys.stdout.flush() # flush output buffer to display text immediately
502-
print() # noqa: T201
457+
print(stream_chunk, end="")
458+
sys.stdout.flush()
459+
print()
503460
return full_response, context_data
504461

505462
return asyncio.run(run_streaming_search())
@@ -512,11 +469,8 @@ def on_context(context: Any) -> None:
512469
verbose=verbose,
513470
)
514471
)
515-
# log the full response at INFO level for user visibility but at DEBUG level in the API layer
516-
logger.info("Basic Search Response:\n%s", response)
472+
print(response)
517473

518-
# NOTE: we return the response and context data here purely as a complete demonstration of the API.
519-
# External users should use the API directly to get the response and context data.
520474
return response, context_data
521475

522476

graphrag/index/workflows/create_base_text_units.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
from graphrag.index.typing.context import PipelineRunContext
1818
from graphrag.index.typing.workflow import WorkflowFunctionOutput
1919
from graphrag.index.utils.hashing import gen_sha512_hash
20-
from graphrag.logger.progress import Progress
2120
from graphrag.utils.storage import load_table_from_storage, write_table_to_storage
2221

2322
logger = logging.getLogger(__name__)
@@ -69,8 +68,6 @@ def create_base_text_units(
6968
zip(*[sort[col] for col in ["id", "text"]], strict=True)
7069
)
7170

72-
callbacks.progress(Progress(percent=0))
73-
7471
agg_dict = {"text_with_ids": list}
7572
if "metadata" in documents:
7673
agg_dict["metadata"] = "first" # type: ignore

graphrag/logger/progress.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@
1616
class Progress:
1717
"""A class representing the progress of a task."""
1818

19-
percent: float | None = None
20-
"""0 - 1 progress"""
21-
2219
description: str | None = None
2320
"""Description of the progress"""
2421

0 commit comments

Comments
 (0)