Skip to content

Commit

Permalink
Raise NotImplementedError if .merge(suffixes=) introduces duplicate…
Browse files Browse the repository at this point in the history
… labels (#17905)

closes #17902

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Bradley Dice (https://github.com/bdice)

URL: #17905
  • Loading branch information
mroeschke authored Feb 4, 2025
1 parent ece6e3f commit df5c943
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 1 deletion.
9 changes: 8 additions & 1 deletion python/cudf/cudf/core/join/join.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,14 @@ def _merge_results(
for name, col in right_result._column_labels_and_values:
if name in common_names:
if name not in self._key_columns_with_same_name:
data[f"{name}{self.rsuffix}"] = col
r_label = f"{name}{self.rsuffix}"
if r_label in data:
raise NotImplementedError(
f"suffixes={(self.lsuffix, self.rsuffix)} would introduce a "
f"duplicate column label, '{r_label}', which is "
"not supported."
)
data[r_label] = col
else:
data[name] = col

Expand Down
12 changes: 12 additions & 0 deletions python/cudf/cudf/tests/test_joining.py
Original file line number Diff line number Diff line change
Expand Up @@ -2290,3 +2290,15 @@ def test_merge_index_on_opposite_how_column_reset_index():
expected = pd.merge(ser, df, on="a", how="right")
result = cudf.merge(ser_cudf, df_cudf, on="a", how="right")
assert_eq(result, expected)


def test_merge_suffixes_duplicate_label_raises():
data = {"a": [1, 2, 3, 4, 5], "b": [6, 6, 6, 6, 6]}
df_cudf = cudf.DataFrame(data)
df_pd = pd.DataFrame(data)
result = df_cudf.merge(df_cudf, on=["a"], suffixes=("", "_right"))
expected = df_pd.merge(df_pd, on=["a"], suffixes=("", "_right"))
assert_eq(result, expected)

with pytest.raises(NotImplementedError):
result.merge(df_cudf, on=["a"], suffixes=("", "_right"))

0 comments on commit df5c943

Please sign in to comment.