diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 88f6cf76941ef..5465ba890a743 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3717,6 +3717,7 @@ The look and feel of Excel worksheets created from pandas can be modified using * ``float_format`` : Format string for floating point numbers (default ``None``). * ``freeze_panes`` : A tuple of two integers representing the bottommost row and rightmost column to freeze. Each of these parameters is one-based, so (1, 1) will freeze the first row and first column (default ``None``). +* ``autofilter`` : A boolean indicating whether to add automatic filters to all columns (default ``False``). .. note:: diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 0f7be8cfbcb68..2dad652d246fa 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -202,6 +202,7 @@ Other enhancements - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`) - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) +- :func:`DataFrame.to_excel` has a new ``autofilter`` parameter to add automatic filters to all columns (:issue:`61194`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :func:`to_numeric` on big integers converts to ``object`` datatype with python integers when not coercing. (:issue:`51295`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) @@ -232,7 +233,6 @@ Other enhancements - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) -- .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 5bcf7d52490d7..d742375241ad8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2180,6 +2180,7 @@ def to_excel( freeze_panes: tuple[int, int] | None = None, storage_options: StorageOptions | None = None, engine_kwargs: dict[str, Any] | None = None, + autofilter: bool = False, ) -> None: """ Write {klass} to an Excel sheet. @@ -2240,6 +2241,9 @@ def to_excel( .. versionadded:: {storage_options_versionadded} {extra_parameters} + autofilter : bool, default False + If True, add automatic filters to all columns. + See Also -------- to_csv : Write DataFrame to a comma-separated values (csv) file. @@ -2312,6 +2316,7 @@ def to_excel( index_label=index_label, merge_cells=merge_cells, inf_rep=inf_rep, + autofilter=autofilter, ) formatter.write( excel_writer, diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 0732ba87de30c..a171b1229f7bb 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1204,6 +1204,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter_range: str | None = None, ) -> None: """ Write given formatted cells into Excel an excel sheet @@ -1218,6 +1219,8 @@ def _write_cells( startcol : upper left cell column to dump data frame freeze_panes: int tuple of length 2 contains the bottom-most row and right-most column to freeze + autofilter_range: str, default None + column ranges to add automatic filters to, for example "A1:D5" """ raise NotImplementedError diff --git a/pandas/io/excel/_odswriter.py b/pandas/io/excel/_odswriter.py index e9a06076f3aff..f49b4422ce13b 100644 --- a/pandas/io/excel/_odswriter.py +++ b/pandas/io/excel/_odswriter.py @@ -99,10 +99,15 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter_range: str | None = None, ) -> None: """ Write the frame cells using odf """ + + if autofilter_range: + raise ValueError("Autofilter is not supported with odf!") + from odf.table import ( Table, TableCell, diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 867d11583dcc0..ea13ffa23cdfb 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -449,6 +449,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter_range: str | None = None, ) -> None: # Write the frame cells using openpyxl. sheet_name = self._get_sheet_name(sheet_name) @@ -532,6 +533,9 @@ def _write_cells( for k, v in style_kwargs.items(): setattr(xcell, k, v) + if autofilter_range: + wks.auto_filter.ref = autofilter_range + class OpenpyxlReader(BaseExcelReader["Workbook"]): @doc(storage_options=_shared_docs["storage_options"]) diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 4a7b8eee2bfce..9ebec51067672 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -245,6 +245,7 @@ def _write_cells( startrow: int = 0, startcol: int = 0, freeze_panes: tuple[int, int] | None = None, + autofilter_range: str | None = None, ) -> None: # Write the frame cells using xlsxwriter. sheet_name = self._get_sheet_name(sheet_name) @@ -282,3 +283,6 @@ def _write_cells( ) else: wks.write(startrow + cell.row, startcol + cell.col, val, style) + + if autofilter_range: + wks.autofilter(autofilter_range) diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index d4d47253a5f82..753a731bd7790 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -532,6 +532,8 @@ class ExcelFormatter: Defaults to ``CSSToExcelConverter()``. It should have signature css_declarations string -> excel style. This is only called for body cells. + autofilter : bool, default False + If True, add automatic filters to all columns """ max_rows = 2**20 @@ -549,6 +551,7 @@ def __init__( merge_cells: ExcelWriterMergeCells = False, inf_rep: str = "inf", style_converter: Callable | None = None, + autofilter: bool = False, ) -> None: self.rowcounter = 0 self.na_rep = na_rep @@ -584,6 +587,7 @@ def __init__( raise ValueError(f"Unexpected value for {merge_cells=}.") self.merge_cells = merge_cells self.inf_rep = inf_rep + self.autofilter = autofilter def _format_value(self, val): if is_scalar(val) and missing.isna(val): @@ -873,6 +877,34 @@ def get_formatted_cells(self) -> Iterable[ExcelCell]: cell.val = self._format_value(cell.val) yield cell + def _num2excel(self, index: int) -> str: + """ + Convert 0-based column index to Excel column name. + + Parameters + ---------- + index : int + The numeric column index to convert to a Excel column name. + + Returns + ------- + column_name : str + The column name corresponding to the index. + + Raises + ------ + ValueError + Index is negative + """ + if index < 0: + raise ValueError(f"Index cannot be negative: {index}") + column_name = "" + # while loop in case column name needs to be longer than 1 character + while index > 0 or not column_name: + index, remainder = divmod(index, 26) + column_name = chr(65 + remainder) + column_name + return column_name + @doc(storage_options=_shared_docs["storage_options"]) def write( self, @@ -916,6 +948,31 @@ def write( f"Max sheet size is: {self.max_rows}, {self.max_cols}" ) + if self.autofilter: + if num_cols == 0: + indexoffset = 0 + elif self.index: + if isinstance(self.df.index, MultiIndex): + indexoffset = self.df.index.nlevels - 1 + if self.merge_cells: + warnings.warn( + "Excel filters merged cells by showing only the first row." + "'autofiler' and 'merge_cells' should not " + "be used simultaneously.", + UserWarning, + stacklevel=find_stack_level(), + ) + else: + indexoffset = 0 + else: + indexoffset = -1 + start = f"{self._num2excel(startcol)}{startrow + 1}" + autofilter_end_column = self._num2excel(startcol + num_cols + indexoffset) + end = f"{autofilter_end_column}{startrow + num_rows + 1}" + autofilter_range = f"{start}:{end}" + else: + autofilter_range = None + if engine_kwargs is None: engine_kwargs = {} @@ -938,6 +995,7 @@ def write( startrow=startrow, startcol=startcol, freeze_panes=freeze_panes, + autofilter_range=autofilter_range, ) finally: # make sure to close opened file handles diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 5039bd0d4210a..35f73b0286e0b 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -579,6 +579,7 @@ def to_excel( verbose: bool = True, freeze_panes: tuple[int, int] | None = None, storage_options: StorageOptions | None = None, + autofilter: bool = False, ) -> None: from pandas.io.formats.excel import ExcelFormatter @@ -592,6 +593,7 @@ def to_excel( index_label=index_label, merge_cells=merge_cells, inf_rep=inf_rep, + autofilter=autofilter, ) formatter.write( excel_writer, diff --git a/pandas/tests/io/excel/test_style.py b/pandas/tests/io/excel/test_style.py index 12f14589365ff..85c22e7a13ed2 100644 --- a/pandas/tests/io/excel/test_style.py +++ b/pandas/tests/io/excel/test_style.py @@ -350,3 +350,89 @@ def test_format_hierarchical_rows_periodindex(merge_cells): assert isinstance(cell.val, Timestamp), ( "Period should be converted to Timestamp" ) + + +@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) +@pytest.mark.parametrize("with_index", [True, False]) +def test_autofilter(engine, with_index, tmp_excel): + # GH 61194 + df = DataFrame.from_dict([{"A": 1, "B": 2, "C": 3}, {"A": 4, "B": 5, "C": 6}]) + + with ExcelWriter(tmp_excel, engine=engine) as writer: + df.to_excel(writer, autofilter=True, index=with_index) + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb.active + + assert ws.auto_filter.ref is not None + assert ws.auto_filter.ref == "A1:D3" if with_index else "A1:C3" + + +@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) +def test_autofilter_with_startrow_startcol(engine, tmp_excel): + # GH 61194 + df = DataFrame.from_dict([{"A": 1, "B": 2, "C": 3}, {"A": 4, "B": 5, "C": 6}]) + with ExcelWriter(tmp_excel, engine=engine) as writer: + df.to_excel(writer, autofilter=True, startrow=10, startcol=10) + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb.active + assert ws.auto_filter.ref is not None + # Autofiler range moved by 10x10 cells + assert ws.auto_filter.ref == "K11:N13" + + +def test_autofilter_not_supported_by_odf(tmp_path): + # GH 61194 + # odf needs 'ods' extension + tmp_excel_ods = tmp_path / f"{uuid.uuid4()}.ods" + tmp_excel_ods.touch() + + with pytest.raises(ValueError, match="Autofilter is not supported with odf!"): + with ExcelWriter(str(tmp_excel_ods), engine="odf") as writer: + DataFrame().to_excel(writer, autofilter=True, index=False) + + +@pytest.mark.parametrize("engine", ["xlsxwriter", "openpyxl"]) +def test_autofilter_with_multiindex(engine, tmp_excel): + # GH 61194 + df = DataFrame( + { + "animal": ("horse", "horse", "dog", "dog"), + "color of fur": ("black", "white", "grey", "black"), + "name": ("Blacky", "Wendy", "Rufus", "Catchy"), + } + ) + # setup hierarchical index + mi_df = df.set_index(["animal", "color of fur"]) + with ExcelWriter(tmp_excel, engine=engine) as writer: + mi_df.to_excel(writer, autofilter=True, index=True, merge_cells=False) + + openpyxl = pytest.importorskip("openpyxl") # test loading only with openpyxl + with contextlib.closing(openpyxl.load_workbook(tmp_excel)) as wb: + ws = wb.active + + assert ws.auto_filter.ref is not None + assert ws.auto_filter.ref == "A1:C5" + + +def test_autofilter_with_multiindex_and_merge_cells_shows_warning(tmp_excel): + # GH 61194 + df = DataFrame( + { + "animal": ("horse", "horse", "dog", "dog"), + "color of fur": ("black", "white", "grey", "black"), + "name": ("Blacky", "Wendy", "Rufus", "Catchy"), + } + ) + # setup hierarchical index + mi_df = df.set_index(["animal", "color of fur"]) + with ExcelWriter(tmp_excel, engine="openpyxl") as writer: + with tm.assert_produces_warning( + UserWarning, + match="Excel filters merged cells by showing only the first row." + "'autofiler' and 'merge_cells' should not be used simultaneously.", + ): + mi_df.to_excel(writer, autofilter=True, index=True, merge_cells=True)