diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 54ded38e77ef1..9d9f72700d190 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ ci: skip: [pyright, mypy] repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.12.7 + rev: v0.12.11 hooks: - id: ruff args: [--exit-non-zero-on-fix] @@ -51,7 +51,7 @@ repos: - id: cython-lint - id: double-quote-cython-strings - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 + rev: v6.0.0 hooks: - id: check-case-conflict - id: check-toml @@ -64,8 +64,6 @@ repos: args: [--fix=auto] exclude: ^pandas/tests/io/parser/data/utf16_ex.txt$ - id: fix-byte-order-marker - - id: fix-encoding-pragma - args: [--remove] - id: trailing-whitespace args: [--markdown-linebreak-ext=md] - repo: https://github.com/PyCQA/isort @@ -94,19 +92,19 @@ repos: - id: sphinx-lint args: ["--enable", "all", "--disable", "line-too-long"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v20.1.8 + rev: v21.1.0 hooks: - id: clang-format files: ^pandas/_libs/src|^pandas/_libs/include args: [-i] types_or: [c, c++] - repo: https://github.com/trim21/pre-commit-mirror-meson - rev: v1.8.3 + rev: v1.9.0 hooks: - id: meson-fmt args: ['--inplace'] - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.10.0.1 + rev: v0.11.0.1 hooks: - id: shellcheck args: ["--severity=warning"] @@ -266,6 +264,11 @@ repos: language: python entry: python scripts/validate_unwanted_patterns.py --validation-type="nodefault_used_not_only_for_typing" types: [python] + - id: unwanted-patterns-doesnt-use-pandas-warnings + name: Check that warning classes for deprecations use pandas' warning classes + language: python + entry: python scripts/validate_unwanted_patterns.py --validation-type="doesnt_use_pandas_warnings" + types: [ python ] - id: no-return-exception name: Use raise instead of return for exceptions language: pygrep diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ffa65032e6aae..d6a547b0cd98a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -895,6 +895,7 @@ Timedelta - Accuracy improvement in :meth:`Timedelta.to_pytimedelta` to round microseconds consistently for large nanosecond based Timedelta (:issue:`57841`) - Bug in :class:`Timedelta` constructor failing to raise when passed an invalid keyword (:issue:`53801`) - Bug in :meth:`DataFrame.cumsum` which was raising ``IndexError`` if dtype is ``timedelta64[ns]`` (:issue:`57956`) +- Bug in multiplication operations with ``timedelta64`` dtype failing to raise ``TypeError`` when multiplying by ``bool`` objects or dtypes (:issue:`58054`) Timezones ^^^^^^^^^ @@ -920,6 +921,7 @@ Conversion - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.convert_dtypes` and :meth:`DataFrame.convert_dtypes` removing timezone information for objects with :class:`ArrowDtype` (:issue:`60237`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) +- Bug in :meth:`to_datetime` and :meth:`to_timedelta` with input ``None`` returning ``None`` instead of ``NaT``, inconsistent with other conversion methods (:issue:`23055`) Strings ^^^^^^^ @@ -951,6 +953,7 @@ Missing ^^^^^^^ - Bug in :meth:`DataFrame.fillna` and :meth:`Series.fillna` that would ignore the ``limit`` argument on :class:`.ExtensionArray` dtypes (:issue:`58001`) - Bug in :meth:`NA.__and__`, :meth:`NA.__or__` and :meth:`NA.__xor__` when operating with ``np.bool_`` objects (:issue:`58427`) +- Bug in ``divmod`` between :class:`NA` and ``Int64`` dtype objects (:issue:`62196`) - MultiIndex @@ -1119,6 +1122,7 @@ Other - Bug in ``Series.list`` methods not preserving the original :class:`Index`. (:issue:`58425`) - Bug in ``Series.list`` methods not preserving the original name. (:issue:`60522`) - Bug in ``Series.replace`` when the Series was created from an :class:`Index` and Copy-On-Write is enabled (:issue:`61622`) +- Bug in ``divmod`` and ``rdivmod`` with :class:`DataFrame`, :class:`Series`, and :class:`Index` with ``bool`` dtypes failing to raise, which was inconsistent with ``__floordiv__`` behavior (:issue:`46043`) - Bug in printing a :class:`DataFrame` with a :class:`DataFrame` stored in :attr:`DataFrame.attrs` raised a ``ValueError`` (:issue:`60455`) - Bug in printing a :class:`Series` with a :class:`DataFrame` stored in :attr:`Series.attrs` raised a ``ValueError`` (:issue:`60568`) - Fixed bug where the :class:`DataFrame` constructor misclassified array-like objects with a ``.name`` attribute as :class:`Series` or :class:`Index` (:issue:`61443`) diff --git a/pandas/_config/config.py b/pandas/_config/config.py index 50dac1925c936..f42e69a786d9f 100644 --- a/pandas/_config/config.py +++ b/pandas/_config/config.py @@ -73,6 +73,7 @@ class DeprecatedOption(NamedTuple): key: str + category: type[Warning] msg: str | None rkey: str | None removal_ver: str | None @@ -589,6 +590,7 @@ def register_option( def deprecate_option( key: str, + category: type[Warning], msg: str | None = None, rkey: str | None = None, removal_ver: str | None = None, @@ -608,6 +610,8 @@ def deprecate_option( key : str Name of the option to be deprecated. must be a fully-qualified option name (e.g "x.y.z.rkey"). + category : Warning + Warning class for the deprecation. msg : str, optional Warning message to output when the key is referenced. if no message is given a default message will be emitted. @@ -631,7 +635,7 @@ def deprecate_option( if key in _deprecated_options: raise OptionError(f"Option '{key}' has already been defined as deprecated.") - _deprecated_options[key] = DeprecatedOption(key, msg, rkey, removal_ver) + _deprecated_options[key] = DeprecatedOption(key, category, msg, rkey, removal_ver) # @@ -716,7 +720,7 @@ def _warn_if_deprecated(key: str) -> bool: if d.msg: warnings.warn( d.msg, - FutureWarning, + d.category, stacklevel=find_stack_level(), ) else: @@ -728,7 +732,11 @@ def _warn_if_deprecated(key: str) -> bool: else: msg += ", please refrain from using it." - warnings.warn(msg, FutureWarning, stacklevel=find_stack_level()) + warnings.warn( + msg, + d.category, + stacklevel=find_stack_level(), + ) return True return False diff --git a/pandas/_libs/src/vendored/ujson/python/objToJSON.c b/pandas/_libs/src/vendored/ujson/python/objToJSON.c index 8342dbcd1763d..fe35f9c3a1d73 100644 --- a/pandas/_libs/src/vendored/ujson/python/objToJSON.c +++ b/pandas/_libs/src/vendored/ujson/python/objToJSON.c @@ -51,6 +51,8 @@ Numeric decoder derived from TCL library #include #include +static const int CSTR_SIZE = 20; + npy_int64 get_nat(void) { return NPY_MIN_INT64; } typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti, @@ -106,7 +108,7 @@ typedef struct __TypeContext { double doubleValue; JSINT64 longValue; - const char *cStr; + char *cStr; NpyArrContext *npyarr; PdBlockContext *pdblock; int transpose; @@ -347,7 +349,8 @@ static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc, } NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit; - return PyDateTimeToIso(obj, base, len); + GET_TC(tc)->cStr = PyDateTimeToIso(obj, base, len); + return GET_TC(tc)->cStr; } static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc, @@ -1007,16 +1010,24 @@ static const char *List_iterGetName(JSOBJ Py_UNUSED(obj), //============================================================================= static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { GET_TC(tc)->index = 0; + GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE); + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } } static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) { const Py_ssize_t index = GET_TC(tc)->index; Py_XDECREF(GET_TC(tc)->itemValue); + if (!GET_TC(tc)->cStr) { + return 0; + } + if (index == 0) { - GET_TC(tc)->cStr = "name"; + strcpy(GET_TC(tc)->cStr, "name"); GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); } else if (index == 1) { - GET_TC(tc)->cStr = "data"; + strcpy(GET_TC(tc)->cStr, "data"); GET_TC(tc)->itemValue = get_values(obj); if (!GET_TC(tc)->itemValue) { return 0; @@ -1049,19 +1060,27 @@ static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; GET_TC(tc)->index = 0; enc->outputFormat = VALUES; // for contained series + GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE); + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } } static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) { const Py_ssize_t index = GET_TC(tc)->index; Py_XDECREF(GET_TC(tc)->itemValue); + if (!GET_TC(tc)->cStr) { + return 0; + } + if (index == 0) { - GET_TC(tc)->cStr = "name"; + strcpy(GET_TC(tc)->cStr, "name"); GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name"); } else if (index == 1) { - GET_TC(tc)->cStr = "index"; + strcpy(GET_TC(tc)->cStr, "index"); GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); } else if (index == 2) { - GET_TC(tc)->cStr = "data"; + strcpy(GET_TC(tc)->cStr, "data"); GET_TC(tc)->itemValue = get_values(obj); if (!GET_TC(tc)->itemValue) { return 0; @@ -1096,19 +1115,27 @@ static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder; GET_TC(tc)->index = 0; enc->outputFormat = VALUES; // for contained series & index + GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE); + if (!GET_TC(tc)->cStr) { + PyErr_NoMemory(); + } } static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) { const Py_ssize_t index = GET_TC(tc)->index; Py_XDECREF(GET_TC(tc)->itemValue); + if (!GET_TC(tc)->cStr) { + return 0; + } + if (index == 0) { - GET_TC(tc)->cStr = "columns"; + strcpy(GET_TC(tc)->cStr, "columns"); GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns"); } else if (index == 1) { - GET_TC(tc)->cStr = "index"; + strcpy(GET_TC(tc)->cStr, "index"); GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index"); } else if (index == 2) { - GET_TC(tc)->cStr = "data"; + strcpy(GET_TC(tc)->cStr, "data"); Py_INCREF(obj); GET_TC(tc)->itemValue = obj; } else { @@ -1880,6 +1907,7 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) { GET_TC(tc)->rowLabels = NULL; NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen); GET_TC(tc)->columnLabels = NULL; + PyObject_Free(GET_TC(tc)->cStr); GET_TC(tc)->cStr = NULL; PyObject_Free(tc->prv); tc->prv = NULL; diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index ce7f288fc0238..b00e362e1309a 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -791,6 +791,10 @@ def _arith_method(self, other, op): # will be all-True, but since this is division, we want # to end up with floating dtype. result = result.astype(np.float64) + elif op_name in {"divmod", "rdivmod"}: + # GH#62196 + res = self._maybe_mask_result(result, mask) + return res, res.copy() else: # Make sure we do this before the "pow" mask checks # to get an expected exception message on shape mismatch. diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index cfea85a515d09..f58d0b1c0b948 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -177,7 +177,7 @@ def __init__( "'pd.options.future.infer_string = True' option globally and use " 'the "str" alias as a shorthand notation to specify a dtype ' '(instead of "string[pyarrow_numpy]").', - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) storage = "pyarrow" diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index bca7224ffc2f5..6e29848171ace 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -245,7 +245,7 @@ def _convert_bool_result(self, values, na=lib.no_default, method_name=None): warnings.warn( f"Allowing a non-bool 'na' in obj.str.{method_name} is deprecated " "and will raise in a future version.", - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) na = bool(na) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 79c3d74b39666..251d035749dea 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -472,6 +472,11 @@ def _add_offset(self, other): @unpack_zerodim_and_defer("__mul__") def __mul__(self, other) -> Self: if is_scalar(other): + if lib.is_bool(other): + raise TypeError( + f"Cannot multiply '{self.dtype}' by bool, explicitly cast to " + "integers instead" + ) # numpy will accept float and int, raise TypeError for others result = self._ndarray * other if result.dtype.kind != "m": @@ -489,6 +494,13 @@ def __mul__(self, other) -> Self: if not hasattr(other, "dtype"): # list, tuple other = np.array(other) + + if other.dtype.kind == "b": + # GH#58054 + raise TypeError( + f"Cannot multiply '{self.dtype}' by bool, explicitly cast to " + "integers instead" + ) if len(other) != len(self) and not lib.is_np_dtype(other.dtype, "m"): # Exclude timedelta64 here so we correctly raise TypeError # for that instead of ValueError diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index eb5c7739e5132..74f95cc7f52b4 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1053,7 +1053,7 @@ def __new__(cls, freq) -> PeriodDtype: # noqa: PYI034 warnings.warn( "PeriodDtype[B] is deprecated and will be removed in a future " "version. Use a DatetimeIndex with freq='B' instead", - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6557388d88f20..93a7de467dd97 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -9139,7 +9139,7 @@ def resample( "deprecated and will be removed in a future version. " "Explicitly cast PeriodIndex to DatetimeIndex before resampling " "instead.", - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) else: @@ -10519,8 +10519,12 @@ def truncate( if ax._is_all_dates: from pandas.core.tools.datetimes import to_datetime - before = to_datetime(before) - after = to_datetime(after) + if before is not None: + # Avoid converting to NaT + before = to_datetime(before) + if after is not None: + # Avoid converting to NaT + after = to_datetime(after) if before is not None and after is not None and before > after: raise ValueError(f"Truncate: {after} must be after {before}") diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index c056c05b9b135..f9789c82a1536 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -61,6 +61,7 @@ class providing the base-class of operations. from pandas.errors import ( AbstractMethodError, DataError, + Pandas4Warning, ) from pandas.util._decorators import ( Appender, @@ -557,7 +558,7 @@ def groups(self) -> dict[Hashable, Index]: "and will be removed. In a future version `groups` by one element " "list will return tuple. Use ``df.groupby(by='a').groups`` " "instead of ``df.groupby(by=['a']).groups`` to avoid this warning", - FutureWarning, + Pandas4Warning, stacklevel=find_stack_level(), ) return self._grouper.groups diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 3a466b6fc7fc8..ecd2e2e4963d3 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -585,6 +585,8 @@ def maybe_prepare_scalar_for_op(obj, shape: Shape): roperator.rfloordiv, operator.pow, roperator.rpow, + divmod, + roperator.rdivmod, } diff --git a/pandas/core/resample.py b/pandas/core/resample.py index c4035ee941fbe..5ae88cff55d6d 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1949,7 +1949,7 @@ def _resampler_for_grouping(self): warnings.warn( "Resampling a groupby with a PeriodIndex is deprecated. " "Cast to DatetimeIndex before resampling instead.", - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) return PeriodIndexResamplerGroupby @@ -2293,11 +2293,12 @@ def _get_resampler(self, obj: NDFrame) -> Resampler: ) elif isinstance(ax, PeriodIndex): if isinstance(ax, PeriodIndex): + # TODO: Enforce in 3.0 (#53481) # GH#53481 warnings.warn( "Resampling with a PeriodIndex is deprecated. " "Cast index to DatetimeIndex before resampling instead.", - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) return PeriodIndexResampler( diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index 9f6baaf691577..397fdcc5cac38 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -159,11 +159,12 @@ def _str_contains( upper_pat = pat.upper() f = lambda x: upper_pat in x.upper() if na is not lib.no_default and not isna(na) and not isinstance(na, bool): + # TODO: Enforce in 3.0 (#59615) # GH#59561 warnings.warn( "Allowing a non-bool 'na' in obj.str.contains is deprecated " "and will raise in a future version.", - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) return self._str_map(f, na, dtype=np.dtype("bool")) @@ -171,11 +172,12 @@ def _str_contains( def _str_startswith(self, pat, na=lib.no_default): f = lambda x: x.startswith(pat) if na is not lib.no_default and not isna(na) and not isinstance(na, bool): + # TODO: Enforce in 3.0 (#59615) # GH#59561 warnings.warn( "Allowing a non-bool 'na' in obj.str.startswith is deprecated " "and will raise in a future version.", - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) @@ -183,11 +185,12 @@ def _str_startswith(self, pat, na=lib.no_default): def _str_endswith(self, pat, na=lib.no_default): f = lambda x: x.endswith(pat) if na is not lib.no_default and not isna(na) and not isinstance(na, bool): + # TODO: Enforce in 3.0 (#59615) # GH#59561 warnings.warn( "Allowing a non-bool 'na' in obj.str.endswith is deprecated " "and will raise in a future version.", - FutureWarning, + FutureWarning, # pdlint: ignore[warning_class] stacklevel=find_stack_level(), ) return self._str_map(f, na_value=na, dtype=np.dtype(bool)) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 040fcd02ab211..3156abc1e4bf6 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -21,6 +21,7 @@ tslib, ) from pandas._libs.tslibs import ( + NaT, OutOfBoundsDatetime, Timedelta, Timestamp, @@ -676,7 +677,7 @@ def to_datetime( unit: str | None = None, origin: str = "unix", cache: bool = True, -) -> DatetimeIndex | Series | DatetimeScalar | NaTType | None: +) -> DatetimeIndex | Series | DatetimeScalar | NaTType: """ Convert argument to datetime. @@ -989,7 +990,7 @@ def to_datetime( if exact is not lib.no_default and format in {"mixed", "ISO8601"}: raise ValueError("Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'") if arg is None: - return None + return NaT if origin != "unix": arg = _adjust_to_origin(arg, origin, unit) diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 8d82a5c213910..dcadb9c24c213 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -182,7 +182,7 @@ def to_timedelta( raise ValueError("errors must be one of 'raise', or 'coerce'.") if arg is None: - return arg + return NaT elif isinstance(arg, ABCSeries): values = _convert_listlike(arg._values, unit=unit, errors=errors) return arg._constructor(values, index=arg.index, name=arg.name) diff --git a/pandas/tests/arithmetic/test_bool.py b/pandas/tests/arithmetic/test_bool.py new file mode 100644 index 0000000000000..3723b7042a3ce --- /dev/null +++ b/pandas/tests/arithmetic/test_bool.py @@ -0,0 +1,28 @@ +import pytest + +from pandas import ( + DataFrame, + Series, +) +import pandas._testing as tm + + +def test_divmod_bool_raises(box_with_array): + # GH#46043 // raises, so divmod should too + ser = Series([True, False]) + obj = tm.box_expected(ser, box_with_array) + + msg = "operator 'floordiv' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + obj // obj + + if box_with_array is DataFrame: + msg = "operator 'floordiv' not implemented for bool dtypes" + else: + msg = "operator 'divmod' not implemented for bool dtypes" + with pytest.raises(NotImplementedError, match=msg): + divmod(obj, obj) + + # go through __rdivmod__ + with pytest.raises(NotImplementedError, match=msg): + divmod(True, obj) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index a05dbfc3e57d1..2361a353f3f8a 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -11,6 +11,7 @@ from pandas._libs.tslibs import timezones from pandas.compat import WASM from pandas.errors import OutOfBoundsDatetime +import pandas.util._test_decorators as td import pandas as pd from pandas import ( @@ -1556,6 +1557,51 @@ def test_tdi_rmul_arraylike(self, other, box_with_array): commute = tdi * other tm.assert_equal(commute, expected) + def test_td64arr_mul_bool_scalar_raises(self, box_with_array): + # GH#58054 + ser = Series(np.arange(5) * timedelta(hours=1)) + obj = tm.box_expected(ser, box_with_array) + + msg = r"Cannot multiply 'timedelta64\[ns\]' by bool" + with pytest.raises(TypeError, match=msg): + True * obj + with pytest.raises(TypeError, match=msg): + obj * True + with pytest.raises(TypeError, match=msg): + np.True_ * obj + with pytest.raises(TypeError, match=msg): + obj * np.True_ + + @pytest.mark.parametrize( + "dtype", + [ + bool, + "boolean", + pytest.param("bool[pyarrow]", marks=td.skip_if_no("pyarrow")), + ], + ) + def test_td64arr_mul_bool_raises(self, dtype, box_with_array): + # GH#58054 + ser = Series(np.arange(5) * timedelta(hours=1)) + obj = tm.box_expected(ser, box_with_array) + + other = Series(np.arange(5) < 0.5, dtype=dtype) + other = tm.box_expected(other, box_with_array) + + msg = r"Cannot multiply 'timedelta64\[ns\]' by bool" + with pytest.raises(TypeError, match=msg): + obj * other + + msg2 = msg.replace("rmul", "mul") + if dtype == "bool[pyarrow]": + # We go through ArrowEA.__mul__ which gives a different message + msg2 = ( + r"operation 'mul' not supported for dtype 'bool\[pyarrow\]' " + r"with dtype 'timedelta64\[ns\]'" + ) + with pytest.raises(TypeError, match=msg2): + other * obj + # ------------------------------------------------------------------ # __div__, __rdiv__ diff --git a/pandas/tests/arrays/masked/test_arithmetic.py b/pandas/tests/arrays/masked/test_arithmetic.py index ea018d2da4d26..779531d525505 100644 --- a/pandas/tests/arrays/masked/test_arithmetic.py +++ b/pandas/tests/arrays/masked/test_arithmetic.py @@ -246,3 +246,26 @@ def test_unary_op_does_not_propagate_mask(data, op): expected = result.copy(deep=True) ser[0] = None tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("dtype", ["Int64", "Int32", "Float32", "Float64"]) +def test_divmod_pdna(dtype): + # GH#62196 + ser = pd.Series([1, 2, 3], dtype=dtype) + res = divmod(pd.NA, ser) + assert isinstance(res, tuple) and len(res) == 2 + + exp = pd.Series([pd.NA, pd.NA, pd.NA], dtype=dtype) + tm.assert_series_equal(res[0], exp) + tm.assert_series_equal(res[1], exp) + + tm.assert_series_equal(res[0], pd.NA // ser) + tm.assert_series_equal(res[1], pd.NA % ser) + + res = divmod(ser, pd.NA) + assert isinstance(res, tuple) and len(res) == 2 + tm.assert_series_equal(res[0], exp) + tm.assert_series_equal(res[1], exp) + + tm.assert_series_equal(res[0], ser // pd.NA) + tm.assert_series_equal(res[1], ser % pd.NA) diff --git a/pandas/tests/config/test_config.py b/pandas/tests/config/test_config.py index a6bc40469cada..242a367dbff2b 100644 --- a/pandas/tests/config/test_config.py +++ b/pandas/tests/config/test_config.py @@ -75,14 +75,14 @@ def test_register_option(self): def test_describe_option(self): cf.register_option("a", 1, "doc") cf.register_option("b", 1, "doc2") - cf.deprecate_option("b") + cf.deprecate_option("b", FutureWarning) cf.register_option("c.d.e1", 1, "doc3") cf.register_option("c.d.e2", 1, "doc4") cf.register_option("f", 1) cf.register_option("g.h", 1) cf.register_option("k", 2) - cf.deprecate_option("g.h", rkey="k") + cf.deprecate_option("g.h", FutureWarning, rkey="k") cf.register_option("l", "foo") # non-existent keys raise KeyError @@ -111,7 +111,8 @@ def test_describe_option(self): cf.set_option("l", "bar") assert "bar" in cf.describe_option("l", _print_desc=False) - def test_case_insensitive(self): + @pytest.mark.parametrize("category", [DeprecationWarning, FutureWarning]) + def test_case_insensitive(self, category): cf.register_option("KanBAN", 1, "doc") assert "doc" in cf.describe_option("kanbaN", _print_desc=False) @@ -124,9 +125,9 @@ def test_case_insensitive(self): with pytest.raises(OptionError, match=msg): cf.get_option("no_such_option") - cf.deprecate_option("KanBan") + cf.deprecate_option("KanBan", category) msg = "'kanban' is deprecated, please refrain from using it." - with pytest.raises(FutureWarning, match=msg): + with pytest.raises(category, match=msg): cf.get_option("kAnBaN") def test_get_option(self): @@ -285,7 +286,7 @@ def test_reset_option_all(self): def test_deprecate_option(self): # we can deprecate non-existent options - cf.deprecate_option("foo") + cf.deprecate_option("foo", FutureWarning) with tm.assert_produces_warning(FutureWarning, match="deprecated"): with pytest.raises(KeyError, match="No such keys.s.: 'foo'"): @@ -295,15 +296,15 @@ def test_deprecate_option(self): cf.register_option("b.c", "hullo", "doc2") cf.register_option("foo", "hullo", "doc2") - cf.deprecate_option("a", removal_ver="nifty_ver") + cf.deprecate_option("a", FutureWarning, removal_ver="nifty_ver") with tm.assert_produces_warning(FutureWarning, match="eprecated.*nifty_ver"): cf.get_option("a") msg = "Option 'a' has already been defined as deprecated" with pytest.raises(OptionError, match=msg): - cf.deprecate_option("a") + cf.deprecate_option("a", FutureWarning) - cf.deprecate_option("b.c", "zounds!") + cf.deprecate_option("b.c", FutureWarning, "zounds!") with tm.assert_produces_warning(FutureWarning, match="zounds!"): cf.get_option("b.c") @@ -313,7 +314,7 @@ def test_deprecate_option(self): assert cf.get_option("d.a") == "foo" assert cf.get_option("d.dep") == "bar" - cf.deprecate_option("d.dep", rkey="d.a") # reroute d.dep to d.a + cf.deprecate_option("d.dep", FutureWarning, rkey="d.a") # reroute d.dep to d.a with tm.assert_produces_warning(FutureWarning, match="eprecated"): assert cf.get_option("d.dep") == "foo" diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 53e9c53efebf7..77e71a5d2e4d0 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -10,7 +10,10 @@ import numpy as np import pytest -from pandas.errors import SpecificationError +from pandas.errors import ( + Pandas4Warning, + SpecificationError, +) import pandas as pd from pandas import ( @@ -545,21 +548,21 @@ def test_multiindex_columns_empty_level(self): grouped = df.groupby("to filter").groups assert grouped["A"] == [0] - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): grouped = df.groupby([("to filter", "")]).groups assert grouped["A"] == [0] df = DataFrame([[1, "A"], [2, "B"]], columns=midx) expected = df.groupby("to filter").groups - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): result = df.groupby([("to filter", "")]).groups assert result == expected df = DataFrame([[1, "A"], [2, "A"]], columns=midx) expected = df.groupby("to filter").groups - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): result = df.groupby([("to filter", "")]).groups tm.assert_dict_equal(result, expected) @@ -571,7 +574,7 @@ def test_groupby_multiindex_tuple(self): ) msg = "`groups` by one element list returns scalar is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): expected = df.groupby([("b", 1)]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) @@ -583,14 +586,14 @@ def test_groupby_multiindex_tuple(self): ), ) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): expected = df2.groupby([("b", "d")]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"]) - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): expected = df3.groupby([("b", "d")]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) @@ -623,7 +626,7 @@ def test_groupby_multiindex_partial_indexing_equivalence(self): tm.assert_frame_equal(expected_max, result_max) msg = "`groups` by one element list returns scalar is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): expected_groups = df.groupby([("a", 1)])[[("b", 1), ("b", 2)]].groups result_groups = df.groupby([("a", 1)])["b"].groups tm.assert_dict_equal(expected_groups, result_groups) @@ -737,7 +740,7 @@ def test_list_grouper_with_nat(self): # Grouper in a list grouping gb = df.groupby([grouper]) expected = {Timestamp("2011-01-01"): Index(list(range(364)))} - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): result = gb.groups tm.assert_dict_equal(result, expected) @@ -1019,7 +1022,7 @@ def test_groups(self, df): grouped = df.groupby(["A"]) msg = "`groups` by one element list returns scalar is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning(Pandas4Warning, match=msg): groups = grouped.groups assert groups is grouped.groups # caching works diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index bf85199ec4f9f..8f242163f9f0c 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -531,6 +531,10 @@ def test_to_datetime_parse_timezone_keeps_name(self): class TestToDatetime: + def test_to_datetime_none(self): + # GH#23055 + assert to_datetime(None) is NaT + @pytest.mark.filterwarnings("ignore:Could not infer format") def test_to_datetime_overflow(self): # we should get an OutOfBoundsDatetime, NOT OverflowError diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index bfbefc50e65ba..08ad7b7fb1b93 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -27,6 +27,10 @@ class TestTimedeltas: + def test_to_timedelta_none(self): + # GH#23055 + assert to_timedelta(None) is pd.NaT + def test_to_timedelta_dt64_raises(self): # Passing datetime64-dtype data to TimedeltaIndex is no longer # supported GH#29794 diff --git a/pandas/tests/util/test_assert_produces_warning.py b/pandas/tests/util/test_assert_produces_warning.py index 5b917dbbe7ba7..9316f1452477c 100644 --- a/pandas/tests/util/test_assert_produces_warning.py +++ b/pandas/tests/util/test_assert_produces_warning.py @@ -38,7 +38,7 @@ def pair_different_warnings(request): def f(): - warnings.warn("f1", FutureWarning) + warnings.warn("f1", FutureWarning) # pdlint: ignore[warning_class] warnings.warn("f2", RuntimeWarning) @@ -175,7 +175,7 @@ def test_match_multiple_warnings(): # https://github.com/pandas-dev/pandas/issues/47829 category = (FutureWarning, UserWarning) with tm.assert_produces_warning(category, match=r"^Match this"): - warnings.warn("Match this", FutureWarning) + warnings.warn("Match this", FutureWarning) # pdlint: ignore[warning_class] warnings.warn("Match this too", UserWarning) @@ -185,7 +185,7 @@ def test_must_match_multiple_warnings(): msg = "Did not see expected warning of class 'UserWarning'" with pytest.raises(AssertionError, match=msg): with tm.assert_produces_warning(category, match=r"^Match this"): - warnings.warn("Match this", FutureWarning) + warnings.warn("Match this", FutureWarning) # pdlint: ignore[warning_class] def test_must_match_multiple_warnings_messages(): @@ -194,7 +194,7 @@ def test_must_match_multiple_warnings_messages(): msg = r"The emitted warning messages are \[UserWarning\('Not this'\)\]" with pytest.raises(AssertionError, match=msg): with tm.assert_produces_warning(category, match=r"^Match this"): - warnings.warn("Match this", FutureWarning) + warnings.warn("Match this", FutureWarning) # pdlint: ignore[warning_class] warnings.warn("Not this", UserWarning) @@ -204,7 +204,7 @@ def test_allow_partial_match_for_multiple_warnings(): with tm.assert_produces_warning( category, match=r"^Match this", must_find_all_warnings=False ): - warnings.warn("Match this", FutureWarning) + warnings.warn("Match this", FutureWarning) # pdlint: ignore[warning_class] def test_allow_partial_match_for_multiple_warnings_messages(): @@ -213,7 +213,7 @@ def test_allow_partial_match_for_multiple_warnings_messages(): with tm.assert_produces_warning( category, match=r"^Match this", must_find_all_warnings=False ): - warnings.warn("Match this", FutureWarning) + warnings.warn("Match this", FutureWarning) # pdlint: ignore[warning_class] warnings.warn("Not this", UserWarning) @@ -250,13 +250,17 @@ def test_raises_during_exception(): with pytest.raises(AssertionError, match=msg): with tm.assert_produces_warning(UserWarning): - warnings.warn("FutureWarning", FutureWarning) + warnings.warn( + "FutureWarning", FutureWarning + ) # pdlint: ignore[warning_class] raise IndexError msg = "Caused unexpected warning" with pytest.raises(AssertionError, match=msg): with tm.assert_produces_warning(None): - warnings.warn("FutureWarning", FutureWarning) + warnings.warn( + "FutureWarning", FutureWarning + ) # pdlint: ignore[warning_class] raise SystemError @@ -267,5 +271,7 @@ def test_passes_during_exception(): with pytest.raises(ValueError, match="Error"): with tm.assert_produces_warning(FutureWarning, match="FutureWarning"): - warnings.warn("FutureWarning", FutureWarning) + warnings.warn( + "FutureWarning", FutureWarning + ) # pdlint: ignore[warning_class] raise ValueError("Error") diff --git a/pandas/tests/util/test_rewrite_warning.py b/pandas/tests/util/test_rewrite_warning.py index f847a06d8ea8d..3db5e44d4fcea 100644 --- a/pandas/tests/util/test_rewrite_warning.py +++ b/pandas/tests/util/test_rewrite_warning.py @@ -36,4 +36,7 @@ def test_rewrite_warning(target_category, target_message, hit, new_category): with rewrite_warning( target_message, target_category, new_message, new_category ): - warnings.warn(message="Target message", category=FutureWarning) + warnings.warn( + message="Target message", + category=FutureWarning, # pdlint: ignore[warning_class] + ) diff --git a/scripts/tests/test_validate_unwanted_patterns.py b/scripts/tests/test_validate_unwanted_patterns.py index e3f5209e843cb..16a2a052910fd 100644 --- a/scripts/tests/test_validate_unwanted_patterns.py +++ b/scripts/tests/test_validate_unwanted_patterns.py @@ -296,3 +296,37 @@ def test_nodefault_used_not_only_for_typing_raises(self, data, expected) -> None fd = io.StringIO(data.strip()) result = list(validate_unwanted_patterns.nodefault_used_not_only_for_typing(fd)) assert result == expected + + +@pytest.mark.parametrize("function", ["warnings.warn", "warn"]) +@pytest.mark.parametrize("positional", [True, False]) +@pytest.mark.parametrize( + "category", + [ + "FutureWarning", + "DeprecationWarning", + "PendingDeprecationWarning", + "Pandas4Warning", + "RuntimeWarning" + ], +) +@pytest.mark.parametrize("pdlint_ignore", [True, False]) +def test_doesnt_use_pandas_warnings(function, positional, category, pdlint_ignore): + code = ( + f"{function}({' # pdlint: ignore[warning_class]' if pdlint_ignore else ''}\n" + f' "message",\n' + f" {'' if positional else 'category='}{category},\n" + f")\n" + ) + flag_issue = ( + category in ["FutureWarning", "DeprecationWarning", "PendingDeprecationWarning"] + and not pdlint_ignore + ) + fd = io.StringIO(code) + result = list(validate_unwanted_patterns.doesnt_use_pandas_warnings(fd)) + if flag_issue: + assert len(result) == 1 + assert result[0][0] == 1 + assert result[0][1].startswith(f"Don't use {category}") + else: + assert len(result) == 0 diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 8475747a80367..39aa0fcd759af 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -16,11 +16,15 @@ Callable, Iterable, ) +import re import sys import token import tokenize from typing import IO +DEPRECATION_WARNINGS_PATTERN = re.compile( + r"(PendingDeprecation|Deprecation|Future)Warning" +) PRIVATE_IMPORTS_TO_IGNORE: set[str] = { "_extension_array_shared_docs", "_index_shared_docs", @@ -344,6 +348,59 @@ def nodefault_used_not_only_for_typing(file_obj: IO[str]) -> Iterable[tuple[int, if isinstance(value, ast.AST) ) +def doesnt_use_pandas_warnings(file_obj: IO[str]) -> Iterable[tuple[int, str]]: + """ + Checking that pandas-specific warnings are used for deprecations. + + Parameters + ---------- + file_obj : IO + File-like object containing the Python code to validate. + + Yields + ------ + line_number : int + Line number of the warning. + msg : str + Explanation of the error. + """ + contents = file_obj.read() + lines = contents.split("\n") + tree = ast.parse(contents) + for node in ast.walk(tree): + if not isinstance(node, ast.Call): + continue + + if ( + isinstance(node.func, ast.Attribute) + and isinstance(node.func.value, ast.Name) + ): + # Check for `warnings.warn`. + if node.func.value.id != "warnings" or node.func.attr != "warn": + continue + elif isinstance(node.func, ast.Name): + # Check for just `warn` when using `from warnings import warn`. + if node.func.id != "warn": + continue + if any( + "# pdlint: ignore[warning_class]" in lines[k] + for k in range(node.lineno - 1, node.end_lineno + 1) + ): + continue + values = ( + [arg.id for arg in node.args if isinstance(arg, ast.Name)] + + [kw.value.id for kw in node.keywords if kw.arg == "category"] + ) + for value in values: + matches = re.match(DEPRECATION_WARNINGS_PATTERN, value) + if matches is not None: + yield ( + node.lineno, + f"Don't use {matches[0]}, use a pandas-specific warning in " + f"pd.errors instead. You can add " + f"`# pdlint: ignore[warning_class]` to override." + ) + def main( function: Callable[[IO[str]], Iterable[tuple[int, str]]], @@ -397,6 +454,7 @@ def main( "private_import_across_module", "strings_with_wrong_placed_whitespace", "nodefault_used_not_only_for_typing", + "doesnt_use_pandas_warnings", ] parser = argparse.ArgumentParser(description="Unwanted patterns checker.")