Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4517,7 +4517,20 @@ def _set_item(self, key, value) -> None:
Series/TimeSeries will be conformed to the DataFrames index to
ensure homogeneity.
"""
value, refs = self._sanitize_column(value)
# Check if we're setting a new column with a tuple key in a MultiIndex DataFrame
# and the value is a scalar. In this case, we need to create a Series with the
# proper name to ensure the name attribute matches the key.
if (
isinstance(key, tuple)
and isinstance(self.columns, MultiIndex)
and not is_list_like(value)
and key not in self.columns
):
# Create a Series with the proper name
value = Series([value] * len(self.index), index=self.index, name=key)
value, refs = self._sanitize_column(value)
else:
value, refs = self._sanitize_column(value)

if (
key in self.columns
Expand Down
61 changes: 57 additions & 4 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2070,6 +2070,13 @@ def _wrap_applied_output(

result = self.obj._constructor(index=res_index, columns=data.columns)
result = result.astype(data.dtypes)

# Preserve metadata for subclassed DataFrames
if hasattr(self.obj, "_metadata"):
for attr in self.obj._metadata:
if hasattr(self.obj, attr):
setattr(result, attr, getattr(self.obj, attr))

return result

# GH12824
Expand All @@ -2081,14 +2088,29 @@ def _wrap_applied_output(
# GH57775 - Ensure that columns and dtypes from original frame are kept.
result = self.obj._constructor(columns=data.columns)
result = result.astype(data.dtypes)

# Preserve metadata for subclassed DataFrames
if hasattr(self.obj, "_metadata"):
for attr in self.obj._metadata:
if hasattr(self.obj, attr):
setattr(result, attr, getattr(self.obj, attr))

return result
elif isinstance(first_not_none, DataFrame):
return self._concat_objects(
result = self._concat_objects(
values,
not_indexed_same=not_indexed_same,
is_transform=is_transform,
)

# Preserve metadata for subclassed DataFrames
if hasattr(self.obj, "_metadata"):
for attr in self.obj._metadata:
if hasattr(self.obj, attr):
setattr(result, attr, getattr(self.obj, attr))

return result

key_index = self._grouper.result_index if self.as_index else None

if isinstance(first_not_none, (np.ndarray, Index)):
Expand All @@ -2105,28 +2127,59 @@ def _wrap_applied_output(
# (expression has type "Hashable", variable
# has type "Tuple[Any, ...]")
name = self._selection # type: ignore[assignment]
return self.obj._constructor_sliced(values, index=key_index, name=name)
result = self.obj._constructor_sliced(values, index=key_index, name=name)

# Preserve metadata for subclassed Series
if hasattr(self.obj, "_metadata"):
for attr in self.obj._metadata:
if hasattr(self.obj, attr):
setattr(result, attr, getattr(self.obj, attr))

return result
elif not isinstance(first_not_none, Series):
# values are not series or array-like but scalars
# self._selection not passed through to Series as the
# result should not take the name of original selection
# of columns
if self.as_index:
return self.obj._constructor_sliced(values, index=key_index)
result = self.obj._constructor_sliced(values, index=key_index)

# Preserve metadata for subclassed Series
if hasattr(self.obj, "_metadata"):
for attr in self.obj._metadata:
if hasattr(self.obj, attr):
setattr(result, attr, getattr(self.obj, attr))

return result
else:
result = self.obj._constructor(values, columns=[self._selection])
result = self._insert_inaxis_grouper(result)

# Preserve metadata for subclassed DataFrames
if hasattr(self.obj, "_metadata"):
for attr in self.obj._metadata:
if hasattr(self.obj, attr):
setattr(result, attr, getattr(self.obj, attr))

return result
else:
# values are Series
return self._wrap_applied_output_series(
result = self._wrap_applied_output_series(
values,
not_indexed_same,
first_not_none,
key_index,
is_transform,
)

# Preserve metadata for subclassed DataFrames/Series
if hasattr(self.obj, "_metadata"):
for attr in self.obj._metadata:
if hasattr(self.obj, attr):
setattr(result, attr, getattr(self.obj, attr))

return result

def _wrap_applied_output_series(
self,
values: list[Series],
Expand Down
57 changes: 57 additions & 0 deletions pandas/tests/frame/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,63 @@ def test_setitem_multi_index(self):
df[("joe", "last")] = df[("jolie", "first")].loc[i, j]
tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")])

def test_setitem_multiindex_scalar_indexer(self):
# GH#62135: Fix DataFrame.__setitem__ with MultiIndex columns and scalar indexer
# Test scalar key assignment with MultiIndex columns
columns = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "a")])
df = DataFrame(np.arange(15).reshape(5, 3), columns=columns)

# Test setting new column with scalar tuple key
df[("C", "c")] = 100
expected_new = DataFrame(
np.array(
[
[0, 1, 2, 100],
[3, 4, 5, 100],
[6, 7, 8, 100],
[9, 10, 11, 100],
[12, 13, 14, 100],
],
dtype=np.int64,
),
columns=MultiIndex.from_tuples(
[("A", "a"), ("A", "b"), ("B", "a"), ("C", "c")]
),
)
tm.assert_frame_equal(df, expected_new)

# Test setting existing column with scalar tuple key
df[("A", "a")] = 999
expected_existing = expected_new.copy()
expected_existing[("A", "a")] = 999
tm.assert_frame_equal(df, expected_existing)

# Test setting with Series using scalar tuple key
series_data = Series([10, 20, 30, 40, 50], dtype=np.int64)
df[("D", "d")] = series_data
expected_series = expected_existing.copy()
expected_series[("D", "d")] = series_data
tm.assert_frame_equal(df, expected_series)

# Test with 3-level MultiIndex
columns_3level = MultiIndex.from_tuples(
[("X", "A", "1"), ("X", "A", "2"), ("Y", "B", "1")]
)
df_3level = DataFrame(np.arange(12).reshape(4, 3), columns=columns_3level)

# Test scalar assignment with 3-level MultiIndex
df_3level[("Z", "C", "3")] = 42
assert ("Z", "C", "3") in df_3level.columns
tm.assert_series_equal(
df_3level[("Z", "C", "3")],
Series([42, 42, 42, 42], name=("Z", "C", "3"), dtype=np.int64),
)

# Test Series assignment with 3-level MultiIndex
new_series = Series([1, 2, 3, 4], name=("W", "D", "4"), dtype=np.int64)
df_3level[("W", "D", "4")] = new_series
tm.assert_series_equal(df_3level[("W", "D", "4")], new_series)

@pytest.mark.parametrize(
"columns,box,expected",
[
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_query_duplicate_column_name(self, engine, parser):
}
).rename(columns={"B": "A"})

res = df.query('C == 1', engine=engine, parser=parser)
res = df.query("C == 1", engine=engine, parser=parser)

expect = DataFrame(
[[1, 1, 1]],
Expand Down Expand Up @@ -1411,7 +1411,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self):
def test_expr_with_column_name_with_backtick(self):
# GH 59285
df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
result = df.query("`a``b` < 2") # noqa
result = df.query("`a``b` < 2")
# Note: Formatting checks may wrongly consider the above ``inline code``.
expected = df[df["a`b"] < 2]
tm.assert_frame_equal(result, expected)
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/groupby/test_groupby_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Tests for metadata preservation in groupby operations.
"""

import numpy as np

import pandas._testing as tm


class TestGroupByMetadataPreservation:
def test_groupby_apply_preserves_metadata(self):
"""Test that groupby.apply() preserves _metadata from subclassed DataFrame."""
# Create a subclassed DataFrame with metadata
subdf = tm.SubclassedDataFrame(
{"X": [1, 1, 2, 2, 3], "Y": np.arange(0, 5), "Z": np.arange(10, 15)}
)
subdf.testattr = "test"

# Apply groupby operation
result = subdf.groupby("X").apply(np.sum, axis=0, include_groups=False)

# Check that metadata is preserved
assert hasattr(result, "testattr"), (
"Metadata attribute 'testattr' should be preserved"
)
assert result.testattr == "test", "Metadata value should be preserved"

# Compare with equivalent operation that preserves metadata
expected = subdf.groupby("X").sum()
assert expected.testattr == "test", (
"Equivalent operation should preserve metadata"
)
Loading