Skip to content

Commit 6bf7e8e

Browse files
committed
Fix union schema name coercion
1 parent 910029d commit 6bf7e8e

File tree

3 files changed

+357
-3
lines changed

3 files changed

+357
-3
lines changed

datafusion/physical-plan/src/union.rs

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -513,7 +513,10 @@ fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> SchemaRef {
513513

514514
let fields = (0..first_schema.fields().len())
515515
.map(|i| {
516-
inputs
516+
let base_field = first_schema.field(i).clone();
517+
518+
// Coerce metadata and nullability across all inputs
519+
let merged_field = inputs
517520
.iter()
518521
.enumerate()
519522
.map(|(input_idx, input)| {
@@ -532,9 +535,10 @@ fn union_schema(inputs: &[Arc<dyn ExecutionPlan>]) -> SchemaRef {
532535
field.with_metadata(metadata)
533536
})
534537
.find_or_first(Field::is_nullable)
535-
// We can unwrap this because if inputs was empty, this would've already panic'ed when we
536-
// indexed into inputs[0].
537538
.unwrap()
539+
.with_name(base_field.name());
540+
541+
merged_field
538542
})
539543
.collect::<Vec<_>>();
540544

datafusion/substrait/tests/cases/consumer_integration.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -560,4 +560,26 @@ mod tests {
560560
);
561561
Ok(())
562562
}
563+
564+
#[tokio::test]
565+
async fn test_multiple_unions() -> Result<()> {
566+
let plan_str = test_plan_to_string("multiple_unions.json").await?;
567+
assert_eq!(
568+
plan_str,
569+
"Projection: Utf8(\"people\") AS product_category, Utf8(\"people\")__temp__0 AS product_type, product_key\
570+
\n Union\
571+
\n Projection: Utf8(\"people\"), Utf8(\"people\") AS Utf8(\"people\")__temp__0, sales.product_key\
572+
\n Left Join: sales.product_key = food.@food_id\
573+
\n TableScan: sales\
574+
\n TableScan: food\
575+
\n Union\
576+
\n Projection: people.$f3, people.$f5, people.product_key0\
577+
\n Left Join: people.product_key0 = food.@food_id\
578+
\n TableScan: people\
579+
\n TableScan: food\
580+
\n TableScan: more_products"
581+
);
582+
583+
Ok(())
584+
}
563585
}
Lines changed: 328 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,328 @@
1+
{
2+
"extensionUris": [{
3+
"extensionUriAnchor": 1,
4+
"uri": "/functions_comparison.yaml"
5+
}],
6+
"extensions": [{
7+
"extensionFunction": {
8+
"extensionUriReference": 1,
9+
"functionAnchor": 0,
10+
"name": "equal:any_any"
11+
}
12+
}],
13+
"relations": [{
14+
"root": {
15+
"input": {
16+
"set": {
17+
"common": {
18+
"direct": {
19+
}
20+
},
21+
"inputs": [{
22+
"project": {
23+
"common": {
24+
"emit": {
25+
"outputMapping": [2, 3, 4]
26+
}
27+
},
28+
"input": {
29+
"join": {
30+
"common": {
31+
"direct": {
32+
}
33+
},
34+
"left": {
35+
"read": {
36+
"common": {
37+
"direct": {
38+
}
39+
},
40+
"baseSchema": {
41+
"names": ["product_key"],
42+
"struct": {
43+
"types": [{
44+
"string": {
45+
"nullability": "NULLABILITY_NULLABLE"
46+
}
47+
}],
48+
"nullability": "NULLABILITY_REQUIRED"
49+
}
50+
},
51+
"namedTable": {
52+
"names": [
53+
"sales"
54+
]
55+
}
56+
}
57+
},
58+
"right": {
59+
"read": {
60+
"common": {
61+
"direct": {
62+
}
63+
},
64+
"baseSchema": {
65+
"names": ["@food_id"],
66+
"struct": {
67+
"types": [{
68+
"string": {
69+
"nullability": "NULLABILITY_NULLABLE"
70+
}
71+
}],
72+
"nullability": "NULLABILITY_REQUIRED"
73+
}
74+
},
75+
"namedTable": {
76+
"names": [
77+
"food"
78+
]
79+
}
80+
}
81+
},
82+
"expression": {
83+
"scalarFunction": {
84+
"functionReference": 0,
85+
"outputType": {
86+
"bool": {
87+
"nullability": "NULLABILITY_NULLABLE"
88+
}
89+
},
90+
"arguments": [{
91+
"value": {
92+
"selection": {
93+
"directReference": {
94+
"structField": {
95+
"field": 0
96+
}
97+
},
98+
"rootReference": {
99+
}
100+
}
101+
}
102+
}, {
103+
"value": {
104+
"selection": {
105+
"directReference": {
106+
"structField": {
107+
"field": 1
108+
}
109+
},
110+
"rootReference": {
111+
}
112+
}
113+
}
114+
}]
115+
}
116+
},
117+
"type": "JOIN_TYPE_LEFT"
118+
}
119+
},
120+
"expressions": [{
121+
"literal": {
122+
"string": "people"
123+
}
124+
}, {
125+
"literal": {
126+
"string": "people"
127+
}
128+
}, {
129+
"selection": {
130+
"directReference": {
131+
"structField": {
132+
"field": 0
133+
}
134+
},
135+
"rootReference": {
136+
}
137+
}
138+
}]
139+
}
140+
}, {
141+
"set": {
142+
"common": {
143+
"direct": {
144+
}
145+
},
146+
"inputs": [{
147+
"project": {
148+
"common": {
149+
"emit": {
150+
"outputMapping": [4, 5, 6]
151+
}
152+
},
153+
"input": {
154+
"join": {
155+
"common": {
156+
"direct": {
157+
}
158+
},
159+
"left": {
160+
"read": {
161+
"common": {
162+
"direct": {
163+
}
164+
},
165+
"baseSchema": {
166+
"names": ["$f3", "$f5", "product_key0"],
167+
"struct": {
168+
"types": [{
169+
"string": {
170+
"nullability": "NULLABILITY_REQUIRED"
171+
}
172+
}, {
173+
"string": {
174+
"nullability": "NULLABILITY_REQUIRED"
175+
}
176+
}, {
177+
"string": {
178+
"nullability": "NULLABILITY_NULLABLE"
179+
}
180+
}],
181+
"nullability": "NULLABILITY_REQUIRED"
182+
}
183+
},
184+
"namedTable": {
185+
"names": [
186+
"people"
187+
]
188+
}
189+
}
190+
},
191+
"right": {
192+
"read": {
193+
"common": {
194+
"direct": {
195+
}
196+
},
197+
"baseSchema": {
198+
"names": ["@food_id"],
199+
"struct": {
200+
"types": [{
201+
"string": {
202+
"nullability": "NULLABILITY_NULLABLE"
203+
}
204+
}],
205+
"nullability": "NULLABILITY_REQUIRED"
206+
}
207+
},
208+
"namedTable": {
209+
"names": [
210+
"food"
211+
]
212+
}
213+
214+
}
215+
},
216+
"expression": {
217+
"scalarFunction": {
218+
"functionReference": 0,
219+
"outputType": {
220+
"bool": {
221+
"nullability": "NULLABILITY_NULLABLE"
222+
}
223+
},
224+
"arguments": [{
225+
"value": {
226+
"selection": {
227+
"directReference": {
228+
"structField": {
229+
"field": 2
230+
}
231+
},
232+
"rootReference": {
233+
}
234+
}
235+
}
236+
}, {
237+
"value": {
238+
"selection": {
239+
"directReference": {
240+
"structField": {
241+
"field": 3
242+
}
243+
},
244+
"rootReference": {
245+
}
246+
}
247+
}
248+
}]
249+
}
250+
},
251+
"type": "JOIN_TYPE_LEFT"
252+
}
253+
},
254+
"expressions": [{
255+
"selection": {
256+
"directReference": {
257+
"structField": {
258+
"field": 0
259+
}
260+
},
261+
"rootReference": {
262+
}
263+
}
264+
}, {
265+
"selection": {
266+
"directReference": {
267+
"structField": {
268+
"field": 1
269+
}
270+
},
271+
"rootReference": {
272+
}
273+
}
274+
}, {
275+
"selection": {
276+
"directReference": {
277+
"structField": {
278+
"field": 2
279+
}
280+
},
281+
"rootReference": {
282+
}
283+
}
284+
}]
285+
}
286+
}, {
287+
"read": {
288+
"common": {
289+
"direct": {
290+
}
291+
},
292+
"baseSchema": {
293+
"names": ["$f1000", "$f2000", "more_products_key0000"],
294+
"struct": {
295+
"types": [{
296+
"string": {
297+
"nullability": "NULLABILITY_REQUIRED"
298+
}
299+
}, {
300+
"string": {
301+
"nullability": "NULLABILITY_REQUIRED"
302+
}
303+
}, {
304+
"string": {
305+
"nullability": "NULLABILITY_NULLABLE"
306+
}
307+
}],
308+
"nullability": "NULLABILITY_REQUIRED"
309+
}
310+
},
311+
"namedTable": {
312+
"names": [
313+
"more_products"
314+
]
315+
}
316+
317+
}
318+
}],
319+
"op": "SET_OP_UNION_ALL"
320+
}
321+
}],
322+
"op": "SET_OP_UNION_ALL"
323+
}
324+
},
325+
"names": ["product_category", "product_type", "product_key"]
326+
}
327+
}]
328+
}

0 commit comments

Comments
 (0)