-
Notifications
You must be signed in to change notification settings - Fork 144
Enforce DataFrame display memory limits with max_rows + min_rows constraint (deprecate repr_rows)
#1367
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Enforce DataFrame display memory limits with max_rows + min_rows constraint (deprecate repr_rows)
#1367
Changes from all commits
fa9f257
0563f6c
168eda8
0ad2621
a7dfd3f
61db037
69bcf6f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -58,7 +58,7 @@ You can customize how DataFrames are rendered by configuring the formatter: | |
| max_height=300, # Maximum height in pixels | ||
| max_memory_bytes=2097152, # Maximum memory for rendering (2MB) | ||
| min_rows_display=20, # Minimum number of rows to display | ||
| repr_rows=10, # Number of rows to display in __repr__ | ||
| max_rows=10, # Maximum rows to display in __repr__ | ||
| enable_cell_expansion=True,# Allow expanding truncated cells | ||
| custom_css=None, # Additional custom CSS | ||
| show_truncation_message=True, # Show message when data is truncated | ||
|
|
@@ -191,7 +191,7 @@ You can control how much data is displayed and how much memory is used for rende | |
| configure_formatter( | ||
| max_memory_bytes=4 * 1024 * 1024, # 4MB maximum memory for display | ||
| min_rows_display=50, # Always show at least 50 rows | ||
| repr_rows=20 # Show 20 rows in __repr__ output | ||
|
Comment on lines
193
to
-194
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same as above, difference between |
||
| max_rows=20 # Show 20 rows in __repr__ output | ||
| ) | ||
|
|
||
| These parameters help balance comprehensive data display against performance considerations. | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
|
|
||
| from __future__ import annotations | ||
|
|
||
| import warnings | ||
| from typing import ( | ||
| TYPE_CHECKING, | ||
| Any, | ||
|
|
@@ -61,6 +62,93 @@ def _validate_bool(value: Any, param_name: str) -> None: | |
| raise TypeError(msg) | ||
|
|
||
|
|
||
| def _validate_formatter_parameters( | ||
| max_cell_length: int, | ||
| max_width: int, | ||
| max_height: int, | ||
| max_memory_bytes: int, | ||
| min_rows_display: int, | ||
| max_rows: int | None, | ||
| repr_rows: int | None, | ||
| enable_cell_expansion: bool, | ||
| show_truncation_message: bool, | ||
| use_shared_styles: bool, | ||
| custom_css: str | None, | ||
| style_provider: Any, | ||
| ) -> int: | ||
| """Validate all formatter parameters and return resolved max_rows value. | ||
|
|
||
| Args: | ||
| max_cell_length: Maximum cell length value to validate | ||
| max_width: Maximum width value to validate | ||
| max_height: Maximum height value to validate | ||
| max_memory_bytes: Maximum memory bytes value to validate | ||
| min_rows_display: Minimum rows to display value to validate | ||
| max_rows: Maximum rows value to validate (None means use default) | ||
| repr_rows: Deprecated repr_rows value to validate | ||
| enable_cell_expansion: Boolean expansion flag to validate | ||
| show_truncation_message: Boolean message flag to validate | ||
| use_shared_styles: Boolean styles flag to validate | ||
| custom_css: Custom CSS string to validate | ||
| style_provider: Style provider object to validate | ||
|
|
||
| Returns: | ||
| The resolved max_rows value after handling repr_rows deprecation | ||
|
|
||
| Raises: | ||
| ValueError: If any numeric parameter is invalid or constraints are violated | ||
| TypeError: If any parameter has invalid type | ||
| DeprecationWarning: If repr_rows parameter is used | ||
| """ | ||
| # Validate numeric parameters | ||
| _validate_positive_int(max_cell_length, "max_cell_length") | ||
| _validate_positive_int(max_width, "max_width") | ||
| _validate_positive_int(max_height, "max_height") | ||
| _validate_positive_int(max_memory_bytes, "max_memory_bytes") | ||
| _validate_positive_int(min_rows_display, "min_rows_display") | ||
|
|
||
| # Handle deprecated repr_rows parameter | ||
| if repr_rows is not None: | ||
| warnings.warn( | ||
| "repr_rows parameter is deprecated, use max_rows instead", | ||
| DeprecationWarning, | ||
| stacklevel=4, | ||
| ) | ||
| _validate_positive_int(repr_rows, "repr_rows") | ||
| if max_rows is not None and repr_rows != max_rows: | ||
| msg = "Cannot specify both repr_rows and max_rows; use max_rows only" | ||
| raise ValueError(msg) | ||
| max_rows = repr_rows | ||
|
|
||
| # Use default if max_rows was not provided | ||
| if max_rows is None: | ||
| max_rows = 10 | ||
|
|
||
| _validate_positive_int(max_rows, "max_rows") | ||
|
|
||
| # Validate constraint: min_rows_display <= max_rows | ||
| if min_rows_display > max_rows: | ||
| msg = "min_rows_display must be less than or equal to max_rows" | ||
| raise ValueError(msg) | ||
|
|
||
| # Validate boolean parameters | ||
| _validate_bool(enable_cell_expansion, "enable_cell_expansion") | ||
| _validate_bool(show_truncation_message, "show_truncation_message") | ||
| _validate_bool(use_shared_styles, "use_shared_styles") | ||
|
|
||
| # Validate custom_css | ||
| if custom_css is not None and not isinstance(custom_css, str): | ||
| msg = "custom_css must be None or a string" | ||
| raise TypeError(msg) | ||
|
|
||
| # Validate style_provider | ||
| if style_provider is not None and not isinstance(style_provider, StyleProvider): | ||
| msg = "style_provider must implement the StyleProvider protocol" | ||
| raise TypeError(msg) | ||
|
|
||
| return max_rows | ||
|
|
||
|
|
||
| @runtime_checkable | ||
| class CellFormatter(Protocol): | ||
| """Protocol for cell value formatters.""" | ||
|
|
@@ -126,8 +214,9 @@ class DataFrameHtmlFormatter: | |
| max_width: Maximum width of the HTML table in pixels | ||
| max_height: Maximum height of the HTML table in pixels | ||
| max_memory_bytes: Maximum memory in bytes for rendered data (default: 2MB) | ||
| min_rows_display: Minimum number of rows to display | ||
| repr_rows: Default number of rows to display in repr output | ||
| min_rows_display: Minimum number of rows to display (must be <= max_rows) | ||
| max_rows: Maximum number of rows to display in repr output | ||
| repr_rows: Deprecated alias for max_rows | ||
| enable_cell_expansion: Whether to add expand/collapse buttons for long cell | ||
| values | ||
| custom_css: Additional CSS to include in the HTML output | ||
|
|
@@ -143,8 +232,9 @@ def __init__( | |
| max_width: int = 1000, | ||
| max_height: int = 300, | ||
| max_memory_bytes: int = 2 * 1024 * 1024, # 2 MB | ||
| min_rows_display: int = 20, | ||
| repr_rows: int = 10, | ||
| min_rows_display: int = 10, | ||
| max_rows: int | None = None, | ||
| repr_rows: int | None = None, | ||
| enable_cell_expansion: bool = True, | ||
| custom_css: str | None = None, | ||
| show_truncation_message: bool = True, | ||
|
|
@@ -163,10 +253,13 @@ def __init__( | |
| Maximum height of the displayed table in pixels. | ||
| max_memory_bytes : int, default 2097152 (2MB) | ||
| Maximum memory in bytes for rendered data. | ||
| min_rows_display : int, default 20 | ||
| Minimum number of rows to display. | ||
| repr_rows : int, default 10 | ||
| Default number of rows to display in repr output. | ||
| min_rows_display : int, default 10 | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's not about this PR per se, but maybe this is an opportunity to tighten up the comments here. We're repeating ourselves with the types and defaults. Those are already in the type hints. I think it's becoming customary to not duplicate that information and the argument line is the preferred place to keep it. That way we don't have to worry about maintaining the values in two places. |
||
| Minimum number of rows to display. Must be less than or equal to | ||
| ``max_rows``. | ||
| max_rows : int, default 10 | ||
| Maximum number of rows to display in repr output. | ||
| repr_rows : int, optional | ||
| Deprecated alias for ``max_rows``. Use ``max_rows`` instead. | ||
| enable_cell_expansion : bool, default True | ||
| Whether to allow cells to expand when clicked. | ||
| custom_css : str, optional | ||
|
|
@@ -183,43 +276,36 @@ def __init__( | |
| ------ | ||
| ValueError | ||
| If max_cell_length, max_width, max_height, max_memory_bytes, | ||
| min_rows_display, or repr_rows is not a positive integer. | ||
| min_rows_display or max_rows is not a positive integer. | ||
| TypeError | ||
| If enable_cell_expansion, show_truncation_message, or use_shared_styles is | ||
| not a boolean, | ||
| or if custom_css is provided but is not a string, | ||
| or if style_provider is provided but does not implement the StyleProvider | ||
| protocol. | ||
| """ | ||
| # Validate numeric parameters | ||
| _validate_positive_int(max_cell_length, "max_cell_length") | ||
| _validate_positive_int(max_width, "max_width") | ||
| _validate_positive_int(max_height, "max_height") | ||
| _validate_positive_int(max_memory_bytes, "max_memory_bytes") | ||
| _validate_positive_int(min_rows_display, "min_rows_display") | ||
| _validate_positive_int(repr_rows, "repr_rows") | ||
|
|
||
| # Validate boolean parameters | ||
| _validate_bool(enable_cell_expansion, "enable_cell_expansion") | ||
| _validate_bool(show_truncation_message, "show_truncation_message") | ||
| _validate_bool(use_shared_styles, "use_shared_styles") | ||
|
|
||
| # Validate custom_css | ||
| if custom_css is not None and not isinstance(custom_css, str): | ||
| msg = "custom_css must be None or a string" | ||
| raise TypeError(msg) | ||
|
|
||
| # Validate style_provider | ||
| if style_provider is not None and not isinstance(style_provider, StyleProvider): | ||
| msg = "style_provider must implement the StyleProvider protocol" | ||
| raise TypeError(msg) | ||
| # Validate all parameters and get resolved max_rows | ||
| resolved_max_rows = _validate_formatter_parameters( | ||
| max_cell_length, | ||
| max_width, | ||
| max_height, | ||
| max_memory_bytes, | ||
| min_rows_display, | ||
| max_rows, | ||
| repr_rows, | ||
| enable_cell_expansion, | ||
| show_truncation_message, | ||
| use_shared_styles, | ||
| custom_css, | ||
| style_provider, | ||
| ) | ||
|
|
||
| self.max_cell_length = max_cell_length | ||
| self.max_width = max_width | ||
| self.max_height = max_height | ||
| self.max_memory_bytes = max_memory_bytes | ||
| self.min_rows_display = min_rows_display | ||
| self.repr_rows = repr_rows | ||
| self._max_rows = resolved_max_rows | ||
| self.enable_cell_expansion = enable_cell_expansion | ||
| self.custom_css = custom_css | ||
| self.show_truncation_message = show_truncation_message | ||
|
|
@@ -231,6 +317,55 @@ def __init__( | |
| self._custom_cell_builder: Callable[[Any, int, int, str], str] | None = None | ||
| self._custom_header_builder: Callable[[Any], str] | None = None | ||
|
|
||
| @property | ||
| def max_rows(self) -> int: | ||
| """Get the maximum number of rows to display. | ||
|
|
||
| Returns: | ||
| The maximum number of rows to display in repr output | ||
| """ | ||
| return self._max_rows | ||
|
|
||
| @max_rows.setter | ||
| def max_rows(self, value: int) -> None: | ||
| """Set the maximum number of rows to display. | ||
|
|
||
| Args: | ||
| value: The maximum number of rows | ||
| """ | ||
| self._max_rows = value | ||
|
|
||
| @property | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If |
||
| def repr_rows(self) -> int: | ||
| """Get the maximum number of rows (deprecated name). | ||
|
|
||
| .. deprecated:: | ||
| Use :attr:`max_rows` instead. This property is provided for | ||
| backward compatibility. | ||
|
|
||
| Returns: | ||
| The maximum number of rows to display | ||
| """ | ||
| return self._max_rows | ||
|
|
||
| @repr_rows.setter | ||
| def repr_rows(self, value: int) -> None: | ||
| """Set the maximum number of rows using deprecated name. | ||
|
|
||
|
Comment on lines
+351
to
+354
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, why add for deprecated? |
||
| .. deprecated:: | ||
| Use :attr:`max_rows` setter instead. This property is provided for | ||
| backward compatibility. | ||
|
|
||
| Args: | ||
| value: The maximum number of rows | ||
| """ | ||
| warnings.warn( | ||
| "repr_rows is deprecated, use max_rows instead", | ||
| DeprecationWarning, | ||
| stacklevel=2, | ||
| ) | ||
| self._max_rows = value | ||
|
|
||
| def register_formatter(self, type_class: type, formatter: CellFormatter) -> None: | ||
| """Register a custom formatter for a specific data type. | ||
|
|
||
|
|
@@ -660,6 +795,7 @@ def configure_formatter(**kwargs: Any) -> None: | |
| "max_height", | ||
| "max_memory_bytes", | ||
| "min_rows_display", | ||
| "max_rows", | ||
| "repr_rows", | ||
| "enable_cell_expansion", | ||
| "custom_css", | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like the default here has
min_rows > max_rows. Also should we have consistent naming of the two? Eithermin_rowsandmax_rowsormin_rows_displayandmax_rows_display?I think the
_displaywas differentiating what happens during adisplay()call vs__repr__but I think these values get used during both calls.