Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/user-guide/dataframe/rendering.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ You can customize how DataFrames are rendered by configuring the formatter:
max_height=300, # Maximum height in pixels
max_memory_bytes=2097152, # Maximum memory for rendering (2MB)
min_rows_display=20, # Minimum number of rows to display
repr_rows=10, # Number of rows to display in __repr__
Comment on lines 60 to -61
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like the default here has min_rows > max_rows. Also should we have consistent naming of the two? Either min_rows and max_rows or min_rows_display and max_rows_display?

I think the _display was differentiating what happens during a display() call vs __repr__ but I think these values get used during both calls.

max_rows=10, # Maximum rows to display in __repr__
enable_cell_expansion=True,# Allow expanding truncated cells
custom_css=None, # Additional custom CSS
show_truncation_message=True, # Show message when data is truncated
Expand Down Expand Up @@ -191,7 +191,7 @@ You can control how much data is displayed and how much memory is used for rende
configure_formatter(
max_memory_bytes=4 * 1024 * 1024, # 4MB maximum memory for display
min_rows_display=50, # Always show at least 50 rows
repr_rows=20 # Show 20 rows in __repr__ output
Comment on lines 193 to -194
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above, difference between _display and without the trailer and also we have here min_rows > max_rows

max_rows=20 # Show 20 rows in __repr__ output
)

These parameters help balance comprehensive data display against performance considerations.
Expand Down
200 changes: 168 additions & 32 deletions python/datafusion/dataframe_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

from __future__ import annotations

import warnings
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -61,6 +62,93 @@ def _validate_bool(value: Any, param_name: str) -> None:
raise TypeError(msg)


def _validate_formatter_parameters(
max_cell_length: int,
max_width: int,
max_height: int,
max_memory_bytes: int,
min_rows_display: int,
max_rows: int | None,
repr_rows: int | None,
enable_cell_expansion: bool,
show_truncation_message: bool,
use_shared_styles: bool,
custom_css: str | None,
style_provider: Any,
) -> int:
"""Validate all formatter parameters and return resolved max_rows value.

Args:
max_cell_length: Maximum cell length value to validate
max_width: Maximum width value to validate
max_height: Maximum height value to validate
max_memory_bytes: Maximum memory bytes value to validate
min_rows_display: Minimum rows to display value to validate
max_rows: Maximum rows value to validate (None means use default)
repr_rows: Deprecated repr_rows value to validate
enable_cell_expansion: Boolean expansion flag to validate
show_truncation_message: Boolean message flag to validate
use_shared_styles: Boolean styles flag to validate
custom_css: Custom CSS string to validate
style_provider: Style provider object to validate

Returns:
The resolved max_rows value after handling repr_rows deprecation

Raises:
ValueError: If any numeric parameter is invalid or constraints are violated
TypeError: If any parameter has invalid type
DeprecationWarning: If repr_rows parameter is used
"""
# Validate numeric parameters
_validate_positive_int(max_cell_length, "max_cell_length")
_validate_positive_int(max_width, "max_width")
_validate_positive_int(max_height, "max_height")
_validate_positive_int(max_memory_bytes, "max_memory_bytes")
_validate_positive_int(min_rows_display, "min_rows_display")

# Handle deprecated repr_rows parameter
if repr_rows is not None:
warnings.warn(
"repr_rows parameter is deprecated, use max_rows instead",
DeprecationWarning,
stacklevel=4,
)
_validate_positive_int(repr_rows, "repr_rows")
if max_rows is not None and repr_rows != max_rows:
msg = "Cannot specify both repr_rows and max_rows; use max_rows only"
raise ValueError(msg)
max_rows = repr_rows

# Use default if max_rows was not provided
if max_rows is None:
max_rows = 10

_validate_positive_int(max_rows, "max_rows")

# Validate constraint: min_rows_display <= max_rows
if min_rows_display > max_rows:
msg = "min_rows_display must be less than or equal to max_rows"
raise ValueError(msg)

# Validate boolean parameters
_validate_bool(enable_cell_expansion, "enable_cell_expansion")
_validate_bool(show_truncation_message, "show_truncation_message")
_validate_bool(use_shared_styles, "use_shared_styles")

# Validate custom_css
if custom_css is not None and not isinstance(custom_css, str):
msg = "custom_css must be None or a string"
raise TypeError(msg)

# Validate style_provider
if style_provider is not None and not isinstance(style_provider, StyleProvider):
msg = "style_provider must implement the StyleProvider protocol"
raise TypeError(msg)

return max_rows


@runtime_checkable
class CellFormatter(Protocol):
"""Protocol for cell value formatters."""
Expand Down Expand Up @@ -126,8 +214,9 @@ class DataFrameHtmlFormatter:
max_width: Maximum width of the HTML table in pixels
max_height: Maximum height of the HTML table in pixels
max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB)
min_rows_display: Minimum number of rows to display
repr_rows: Default number of rows to display in repr output
min_rows_display: Minimum number of rows to display (must be <= max_rows)
max_rows: Maximum number of rows to display in repr output
repr_rows: Deprecated alias for max_rows
enable_cell_expansion: Whether to add expand/collapse buttons for long cell
values
custom_css: Additional CSS to include in the HTML output
Expand All @@ -143,8 +232,9 @@ def __init__(
max_width: int = 1000,
max_height: int = 300,
max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB
min_rows_display: int = 20,
repr_rows: int = 10,
min_rows_display: int = 10,
max_rows: int | None = None,
repr_rows: int | None = None,
enable_cell_expansion: bool = True,
custom_css: str | None = None,
show_truncation_message: bool = True,
Expand All @@ -163,10 +253,13 @@ def __init__(
Maximum height of the displayed table in pixels.
max_memory_bytes : int, default 2097152 (2MB)
Maximum memory in bytes for rendered data.
min_rows_display : int, default 20
Minimum number of rows to display.
repr_rows : int, default 10
Default number of rows to display in repr output.
min_rows_display : int, default 10
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not about this PR per se, but maybe this is an opportunity to tighten up the comments here. We're repeating ourselves with the types and defaults. Those are already in the type hints. I think it's becoming customary to not duplicate that information and the argument line is the preferred place to keep it. That way we don't have to worry about maintaining the values in two places.

Minimum number of rows to display. Must be less than or equal to
``max_rows``.
max_rows : int, default 10
Maximum number of rows to display in repr output.
repr_rows : int, optional
Deprecated alias for ``max_rows``. Use ``max_rows`` instead.
enable_cell_expansion : bool, default True
Whether to allow cells to expand when clicked.
custom_css : str, optional
Expand All @@ -183,43 +276,36 @@ def __init__(
------
ValueError
If max_cell_length, max_width, max_height, max_memory_bytes,
min_rows_display, or repr_rows is not a positive integer.
min_rows_display or max_rows is not a positive integer.
TypeError
If enable_cell_expansion, show_truncation_message, or use_shared_styles is
not a boolean,
or if custom_css is provided but is not a string,
or if style_provider is provided but does not implement the StyleProvider
protocol.
"""
# Validate numeric parameters
_validate_positive_int(max_cell_length, "max_cell_length")
_validate_positive_int(max_width, "max_width")
_validate_positive_int(max_height, "max_height")
_validate_positive_int(max_memory_bytes, "max_memory_bytes")
_validate_positive_int(min_rows_display, "min_rows_display")
_validate_positive_int(repr_rows, "repr_rows")

# Validate boolean parameters
_validate_bool(enable_cell_expansion, "enable_cell_expansion")
_validate_bool(show_truncation_message, "show_truncation_message")
_validate_bool(use_shared_styles, "use_shared_styles")

# Validate custom_css
if custom_css is not None and not isinstance(custom_css, str):
msg = "custom_css must be None or a string"
raise TypeError(msg)

# Validate style_provider
if style_provider is not None and not isinstance(style_provider, StyleProvider):
msg = "style_provider must implement the StyleProvider protocol"
raise TypeError(msg)
# Validate all parameters and get resolved max_rows
resolved_max_rows = _validate_formatter_parameters(
max_cell_length,
max_width,
max_height,
max_memory_bytes,
min_rows_display,
max_rows,
repr_rows,
enable_cell_expansion,
show_truncation_message,
use_shared_styles,
custom_css,
style_provider,
)

self.max_cell_length = max_cell_length
self.max_width = max_width
self.max_height = max_height
self.max_memory_bytes = max_memory_bytes
self.min_rows_display = min_rows_display
self.repr_rows = repr_rows
self._max_rows = resolved_max_rows
self.enable_cell_expansion = enable_cell_expansion
self.custom_css = custom_css
self.show_truncation_message = show_truncation_message
Expand All @@ -231,6 +317,55 @@ def __init__(
self._custom_cell_builder: Callable[[Any, int, int, str], str] | None = None
self._custom_header_builder: Callable[[Any], str] | None = None

@property
def max_rows(self) -> int:
"""Get the maximum number of rows to display.

Returns:
The maximum number of rows to display in repr output
"""
return self._max_rows

@max_rows.setter
def max_rows(self, value: int) -> None:
"""Set the maximum number of rows to display.

Args:
value: The maximum number of rows
"""
self._max_rows = value

@property
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If repr_rows is being deprecated, why add an accessor?

def repr_rows(self) -> int:
"""Get the maximum number of rows (deprecated name).

.. deprecated::
Use :attr:`max_rows` instead. This property is provided for
backward compatibility.

Returns:
The maximum number of rows to display
"""
return self._max_rows

@repr_rows.setter
def repr_rows(self, value: int) -> None:
"""Set the maximum number of rows using deprecated name.

Comment on lines +351 to +354
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same, why add for deprecated?

.. deprecated::
Use :attr:`max_rows` setter instead. This property is provided for
backward compatibility.

Args:
value: The maximum number of rows
"""
warnings.warn(
"repr_rows is deprecated, use max_rows instead",
DeprecationWarning,
stacklevel=2,
)
self._max_rows = value

def register_formatter(self, type_class: type, formatter: CellFormatter) -> None:
"""Register a custom formatter for a specific data type.

Expand Down Expand Up @@ -660,6 +795,7 @@ def configure_formatter(**kwargs: Any) -> None:
"max_height",
"max_memory_bytes",
"min_rows_display",
"max_rows",
"repr_rows",
"enable_cell_expansion",
"custom_css",
Expand Down
Loading