Skip to content

Commit 9792bd9

Browse files
committed
Include top-level nulls in perfectly shredded child array
1 parent 88b7fca commit 9792bd9

File tree

1 file changed

+152
-4
lines changed

1 file changed

+152
-4
lines changed

parquet-variant-compute/src/variant_get.rs

Lines changed: 152 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717
use arrow::{
18-
array::{self, Array, ArrayRef, BinaryViewArray, StructArray},
18+
array::{self, Array, ArrayRef, BinaryViewArray, StructArray, make_array},
19+
buffer::NullBuffer,
1920
compute::CastOptions,
2021
datatypes::Field,
2122
error::Result,
@@ -261,8 +262,24 @@ fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Opti
261262
// 2. If every row in the `value` column is null
262263

263264
// This is a perfect shredding, where the value is entirely shredded out,
264-
// so we can just return the typed value.
265-
return Some(typed_value.clone());
265+
// so we can just return the typed value after merging the accumulated nulls.
266+
let parent_nulls = variant_array.nulls();
267+
268+
// If we have no nulls OR the shredded array is `Null`, which doesn't support external nulls.
269+
let target_array = if parent_nulls.is_none() || typed_value.data_type().is_null() {
270+
typed_value.clone()
271+
} else {
272+
let merged_nulls = NullBuffer::union(parent_nulls, typed_value.nulls());
273+
let data = typed_value
274+
.to_data()
275+
.into_builder()
276+
.nulls(merged_nulls)
277+
.build()
278+
.ok()?;
279+
make_array(data)
280+
};
281+
282+
return Some(target_array.clone());
266283
}
267284
None
268285
}
@@ -346,7 +363,7 @@ mod test {
346363
Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
347364
Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array,
348365
LargeBinaryArray, LargeListArray, LargeListViewArray, LargeStringArray, ListArray,
349-
ListViewArray, NullBuilder, StringArray, StringViewArray, StructArray,
366+
ListViewArray, NullArray, NullBuilder, StringArray, StringViewArray, StructArray,
350367
Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray,
351368
};
352369
use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer};
@@ -4321,4 +4338,135 @@ mod test {
43214338
);
43224339
}
43234340
}
4341+
4342+
macro_rules! perfectly_shredded_preserves_top_level_nulls_test {
4343+
($name:ident, $result_type:expr, $typed_value:expr, $expected_array:expr) => {
4344+
perfectly_shredded_preserves_top_level_nulls_test!(
4345+
$name,
4346+
$result_type,
4347+
$typed_value,
4348+
Some(NullBuffer::from(vec![true, false, true])),
4349+
$expected_array
4350+
);
4351+
};
4352+
($name:ident, $result_type:expr, $typed_value:expr, $parent_nulls:expr, $expected_array:expr) => {
4353+
#[test]
4354+
fn $name() {
4355+
let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
4356+
EMPTY_VARIANT_METADATA_BYTES,
4357+
3,
4358+
));
4359+
let typed_value: ArrayRef = Arc::new($typed_value);
4360+
let variant_array: ArrayRef =
4361+
VariantArray::from_parts(metadata, None, Some(typed_value), $parent_nulls)
4362+
.into();
4363+
4364+
let result = variant_get(
4365+
&variant_array,
4366+
GetOptions::new().with_as_type(Some(FieldRef::from(Field::new(
4367+
"result",
4368+
$result_type,
4369+
true,
4370+
)))),
4371+
)
4372+
.unwrap();
4373+
4374+
let expected_array: ArrayRef = Arc::new($expected_array);
4375+
assert_eq!(&result, &expected_array);
4376+
}
4377+
};
4378+
}
4379+
4380+
perfectly_shredded_preserves_top_level_nulls_test!(
4381+
test_variant_get_perfectly_shredded_integer_preserves_top_level_nulls,
4382+
DataType::Int32,
4383+
Int32Array::from(vec![Some(0_i32), Some(1_i32), Some(2_i32)]),
4384+
Int32Array::from(vec![Some(0_i32), None, Some(2_i32)])
4385+
);
4386+
4387+
perfectly_shredded_preserves_top_level_nulls_test!(
4388+
test_variant_get_perfectly_shredded_integer_unions_child_and_top_level_nulls,
4389+
DataType::Int32,
4390+
Int32Array::from(vec![None, Some(1_i32), Some(2_i32)]),
4391+
Some(NullBuffer::from(vec![true, false, true])),
4392+
Int32Array::from(vec![None, None, Some(2_i32)])
4393+
);
4394+
4395+
perfectly_shredded_preserves_top_level_nulls_test!(
4396+
test_variant_get_perfectly_shredded_null_preserves_top_level_nulls,
4397+
DataType::Null,
4398+
NullArray::new(3),
4399+
NullArray::new(3)
4400+
);
4401+
4402+
perfectly_shredded_preserves_top_level_nulls_test!(
4403+
test_variant_get_perfectly_shredded_binary_view_preserves_top_level_nulls,
4404+
DataType::BinaryView,
4405+
BinaryViewArray::from(vec![
4406+
Some(b"Apache" as &[u8]),
4407+
Some(b"masked-null" as &[u8]),
4408+
Some(b"Parquet-variant" as &[u8]),
4409+
]),
4410+
BinaryViewArray::from(vec![
4411+
Some(b"Apache" as &[u8]),
4412+
None,
4413+
Some(b"Parquet-variant" as &[u8]),
4414+
])
4415+
);
4416+
4417+
perfectly_shredded_preserves_top_level_nulls_test!(
4418+
test_variant_get_perfectly_shredded_binary_preserves_top_level_nulls,
4419+
DataType::Binary,
4420+
BinaryArray::from(vec![
4421+
Some(b"Apache" as &[u8]),
4422+
Some(b"masked-null" as &[u8]),
4423+
Some(b"Parquet-variant" as &[u8]),
4424+
]),
4425+
BinaryArray::from(vec![
4426+
Some(b"Apache" as &[u8]),
4427+
None,
4428+
Some(b"Parquet-variant" as &[u8]),
4429+
])
4430+
);
4431+
4432+
perfectly_shredded_preserves_top_level_nulls_test!(
4433+
test_variant_get_perfectly_shredded_decimal4_preserves_top_level_nulls,
4434+
DataType::Decimal32(5, 2),
4435+
Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)])
4436+
.with_precision_and_scale(5, 2)
4437+
.unwrap(),
4438+
Decimal32Array::from(vec![Some(12345), None, Some(-12342)])
4439+
.with_precision_and_scale(5, 2)
4440+
.unwrap()
4441+
);
4442+
4443+
perfectly_shredded_preserves_top_level_nulls_test!(
4444+
test_variant_get_perfectly_shredded_decimal8_preserves_top_level_nulls,
4445+
DataType::Decimal64(10, 1),
4446+
Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)])
4447+
.with_precision_and_scale(10, 1)
4448+
.unwrap(),
4449+
Decimal64Array::from(vec![Some(1234567809), None, Some(-1234561203)])
4450+
.with_precision_and_scale(10, 1)
4451+
.unwrap()
4452+
);
4453+
4454+
perfectly_shredded_preserves_top_level_nulls_test!(
4455+
test_variant_get_perfectly_shredded_decimal16_preserves_top_level_nulls,
4456+
DataType::Decimal128(20, 3),
4457+
Decimal128Array::from(vec![
4458+
Some(i128::from_str("12345678901234567899").unwrap()),
4459+
Some(i128::from_str("23445677483748324300").unwrap()),
4460+
Some(i128::from_str("-12345678901234567899").unwrap()),
4461+
])
4462+
.with_precision_and_scale(20, 3)
4463+
.unwrap(),
4464+
Decimal128Array::from(vec![
4465+
Some(i128::from_str("12345678901234567899").unwrap()),
4466+
None,
4467+
Some(i128::from_str("-12345678901234567899").unwrap()),
4468+
])
4469+
.with_precision_and_scale(20, 3)
4470+
.unwrap()
4471+
);
43244472
}

0 commit comments

Comments
 (0)