CLN: Remove deprecated read_csv(delim_whitespace=) (#58668)

* CLN: Remove deprecated read_csv(delim_whitespace=) * Clarify notes * Fix some arrow failures * Update doc/source/whatsnew/v3.0.0.rst Co-authored-by: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: Abdulaziz Aloqeely <52792999+Aloqeely@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
pandas-dev · May 15, 2024 · 0fc0336 · 0fc0336
1 parent c1234db
commit 0fc0336
Show file tree

Hide file tree

Showing 14 changed files with 50 additions and 284 deletions.
diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -1511,7 +1511,6 @@ Currently, options unsupported by the C and pyarrow engines include:
 
 * ``sep`` other than a single character (e.g. regex separators)
 * ``skipfooter``
-* ``sep=None`` with ``delim_whitespace=False``
 
 Specifying any of the above options will produce a ``ParserWarning`` unless the
 python engine is selected explicitly using ``engine='python'``.
@@ -1526,7 +1525,6 @@ Options that are unsupported by the pyarrow engine which are not covered by the
 * ``memory_map``
 * ``dialect``
 * ``on_bad_lines``
-* ``delim_whitespace``
 * ``quoting``
 * ``lineterminator``
 * ``converters``

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -243,6 +243,7 @@ Removal of prior version deprecations/changes
 - Removed deprecated "method" and "limit" keywords from :meth:`Series.replace` and :meth:`DataFrame.replace` (:issue:`53492`)
 - Removed extension test classes ``BaseNoReduceTests``, ``BaseNumericReduceTests``, ``BaseBooleanReduceTests`` (:issue:`54663`)
 - Removed the "closed" and "normalize" keywords in :meth:`DatetimeIndex.__new__` (:issue:`52628`)
+- Removed the deprecated ``delim_whitespace`` keyword in :func:`read_csv` and :func:`read_table`, use ``sep=r"\s+"`` instead (:issue:`55569`)
 - Require :meth:`SparseDtype.fill_value` to be a valid value for the :meth:`SparseDtype.subtype` (:issue:`53043`)
 - Stopped performing dtype inference when setting a :class:`Index` into a :class:`DataFrame` (:issue:`56102`)
 - Stopped performing dtype inference with in :meth:`Index.insert` with object-dtype index; this often affects the index/columns that result when setting new entries into an empty :class:`Series` or :class:`DataFrame` (:issue:`51363`)

diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py
@@ -229,7 +229,6 @@ class ParserWarning(Warning):
 
     1. `sep` other than a single character (e.g. regex separators)
     2. `skipfooter` higher than 0
-    3. `sep=None` with `delim_whitespace=False`
 
     The warning can be avoided by adding `engine='python'` as a parameter in
     `pd.read_csv` and `pd.read_table` methods.

diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py
@@ -113,9 +113,8 @@ def read_clipboard(
         if index_length != 0:
             kwargs.setdefault("index_col", list(range(index_length)))
 
-    # Edge case where sep is specified to be None, return to default
-    if sep is None and kwargs.get("delim_whitespace") is None:
-        sep = r"\s+"
+    elif not isinstance(sep, str):
+        raise ValueError(f"{sep=} must be a string")
 
     # Regex separator currently only works with python engine.
     # Default to python if separator is multi-character (regex)

diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py
@@ -133,7 +133,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
         encoding_errors: str | None
         dialect: str | csv.Dialect | None
         on_bad_lines: str
-        delim_whitespace: bool | lib.NoDefault
         low_memory: bool
         memory_map: bool
         float_precision: Literal["high", "legacy", "round_trip"] | None
@@ -425,14 +424,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
 
         Callable for ``engine='pyarrow'``
 
-delim_whitespace : bool, default False
-    Specifies whether or not whitespace (e.g. ``' '`` or ``'\\t'``) will be
-    used as the ``sep`` delimiter. Equivalent to setting ``sep='\\s+'``. If this option
-    is set to ``True``, nothing should be passed in for the ``delimiter``
-    parameter.
-
-    .. deprecated:: 2.2.0
-        Use ``sep="\\s+"`` instead.
 low_memory : bool, default True
     Internally process the file in chunks, resulting in lower memory use
     while parsing, but possibly mixed type inference.  To ensure no mixed
@@ -558,15 +549,13 @@ class _read_shared(TypedDict, Generic[HashableT], total=False):
 
 
 class _C_Parser_Defaults(TypedDict):
-    delim_whitespace: Literal[False]
     na_filter: Literal[True]
     low_memory: Literal[True]
     memory_map: Literal[False]
     float_precision: None
 
 
 _c_parser_defaults: _C_Parser_Defaults = {
-    "delim_whitespace": False,
     "na_filter": True,
     "low_memory": True,
     "memory_map": False,
@@ -592,7 +581,6 @@ class _Fwf_Defaults(TypedDict):
     "thousands",
     "memory_map",
     "dialect",
-    "delim_whitespace",
     "quoting",
     "lineterminator",
     "converters",
@@ -818,24 +806,12 @@ def read_csv(
     # Error Handling
     on_bad_lines: str = "error",
     # Internal
-    delim_whitespace: bool | lib.NoDefault = lib.no_default,
     low_memory: bool = _c_parser_defaults["low_memory"],
     memory_map: bool = False,
     float_precision: Literal["high", "legacy", "round_trip"] | None = None,
     storage_options: StorageOptions | None = None,
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
 ) -> DataFrame | TextFileReader:
-    if delim_whitespace is not lib.no_default:
-        # GH#55569
-        warnings.warn(
-            "The 'delim_whitespace' keyword in pd.read_csv is deprecated and "
-            "will be removed in a future version. Use ``sep='\\s+'`` instead",
-            FutureWarning,
-            stacklevel=find_stack_level(),
-        )
-    else:
-        delim_whitespace = False
-
     # locals() should never be modified
     kwds = locals().copy()
     del kwds["filepath_or_buffer"]
@@ -844,7 +820,6 @@ def read_csv(
     kwds_defaults = _refine_defaults_read(
         dialect,
         delimiter,
-        delim_whitespace,
         engine,
         sep,
         on_bad_lines,
@@ -963,24 +938,12 @@ def read_table(
     # Error Handling
     on_bad_lines: str = "error",
     # Internal
-    delim_whitespace: bool | lib.NoDefault = lib.no_default,
     low_memory: bool = _c_parser_defaults["low_memory"],
     memory_map: bool = False,
     float_precision: Literal["high", "legacy", "round_trip"] | None = None,
     storage_options: StorageOptions | None = None,
     dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
 ) -> DataFrame | TextFileReader:
-    if delim_whitespace is not lib.no_default:
-        # GH#55569
-        warnings.warn(
-            "The 'delim_whitespace' keyword in pd.read_table is deprecated and "
-            "will be removed in a future version. Use ``sep='\\s+'`` instead",
-            FutureWarning,
-            stacklevel=find_stack_level(),
-        )
-    else:
-        delim_whitespace = False
-
     # locals() should never be modified
     kwds = locals().copy()
     del kwds["filepath_or_buffer"]
@@ -989,7 +952,6 @@ def read_table(
     kwds_defaults = _refine_defaults_read(
         dialect,
         delimiter,
-        delim_whitespace,
         engine,
         sep,
         on_bad_lines,
@@ -1296,17 +1258,10 @@ def _clean_options(
                 engine = "python"
 
         sep = options["delimiter"]
-        delim_whitespace = options["delim_whitespace"]
 
-        if sep is None and not delim_whitespace:
-            if engine in ("c", "pyarrow"):
-                fallback_reason = (
-                    f"the '{engine}' engine does not support "
-                    "sep=None with delim_whitespace=False"
-                )
-                engine = "python"
-        elif sep is not None and len(sep) > 1:
+        if sep is not None and len(sep) > 1:
             if engine == "c" and sep == r"\s+":
+                # delim_whitespace passed on to pandas._libs.parsers.TextReader
                 result["delim_whitespace"] = True
                 del result["delimiter"]
             elif engine not in ("python", "python-fwf"):
@@ -1317,9 +1272,6 @@ def _clean_options(
                     r"different from '\s+' are interpreted as regex)"
                 )
                 engine = "python"
-        elif delim_whitespace:
-            if "python" in engine:
-                result["delimiter"] = r"\s+"
         elif sep is not None:
             encodeable = True
             encoding = sys.getfilesystemencoding() or "utf-8"
@@ -1730,7 +1682,6 @@ def _stringify_na_values(na_values, floatify: bool) -> set[str | float]:
 def _refine_defaults_read(
     dialect: str | csv.Dialect | None,
     delimiter: str | None | lib.NoDefault,
-    delim_whitespace: bool,
     engine: CSVEngine | None,
     sep: str | None | lib.NoDefault,
     on_bad_lines: str | Callable,
@@ -1750,14 +1701,6 @@ def _refine_defaults_read(
         documentation for more details.
     delimiter : str or object
         Alias for sep.
-    delim_whitespace : bool
-        Specifies whether or not whitespace (e.g. ``' '`` or ``'\t'``) will be
-        used as the sep. Equivalent to setting ``sep='\\s+'``. If this option
-        is set to True, nothing should be passed in for the ``delimiter``
-        parameter.
-
-        .. deprecated:: 2.2.0
-            Use ``sep="\\s+"`` instead.
     engine : {{'c', 'python'}}
         Parser engine to use. The C engine is faster while the python engine is
         currently more feature-complete.
@@ -1777,12 +1720,6 @@ def _refine_defaults_read(
     -------
     kwds : dict
         Input parameters with correct values.
-
-    Raises
-    ------
-    ValueError :
-        If a delimiter was specified with ``sep`` (or ``delimiter``) and
-        ``delim_whitespace=True``.
     """
     # fix types for sep, delimiter to Union(str, Any)
     delim_default = defaults["delimiter"]
@@ -1813,12 +1750,6 @@ def _refine_defaults_read(
     if delimiter is None:
         delimiter = sep
 
-    if delim_whitespace and (delimiter is not lib.no_default):
-        raise ValueError(
-            "Specified a delimiter with both sep and "
-            "delim_whitespace=True; you can only specify one."
-        )
-
     if delimiter == "\n":
         raise ValueError(
             r"Specified \n as separator or delimiter. This forces the python engine "