|
15 | 15 | // specific language governing permissions and limitations |
16 | 16 | // under the License. |
17 | 17 | use arrow::{ |
18 | | - array::{self, Array, ArrayRef, BinaryViewArray, StructArray}, |
| 18 | + array::{self, Array, ArrayRef, BinaryViewArray, StructArray, make_array}, |
| 19 | + buffer::NullBuffer, |
19 | 20 | compute::CastOptions, |
20 | 21 | datatypes::Field, |
21 | 22 | error::Result, |
@@ -261,8 +262,24 @@ fn try_perfect_shredding(variant_array: &VariantArray, as_field: &Field) -> Opti |
261 | 262 | // 2. If every row in the `value` column is null |
262 | 263 |
|
263 | 264 | // This is a perfect shredding, where the value is entirely shredded out, |
264 | | - // so we can just return the typed value. |
265 | | - return Some(typed_value.clone()); |
| 265 | + // so we can just return the typed value after merging the accumulated nulls. |
| 266 | + let parent_nulls = variant_array.nulls(); |
| 267 | + |
| 268 | + // If we have no nulls OR the shredded array is `Null`, which doesn't support external nulls. |
| 269 | + let target_array = if parent_nulls.is_none() || typed_value.data_type().is_null() { |
| 270 | + typed_value.clone() |
| 271 | + } else { |
| 272 | + let merged_nulls = NullBuffer::union(parent_nulls, typed_value.nulls()); |
| 273 | + let data = typed_value |
| 274 | + .to_data() |
| 275 | + .into_builder() |
| 276 | + .nulls(merged_nulls) |
| 277 | + .build() |
| 278 | + .ok()?; |
| 279 | + make_array(data) |
| 280 | + }; |
| 281 | + |
| 282 | + return Some(target_array.clone()); |
266 | 283 | } |
267 | 284 | None |
268 | 285 | } |
@@ -346,7 +363,7 @@ mod test { |
346 | 363 | Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array, |
347 | 364 | Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, |
348 | 365 | LargeBinaryArray, LargeListArray, LargeListViewArray, LargeStringArray, ListArray, |
349 | | - ListViewArray, NullBuilder, StringArray, StringViewArray, StructArray, |
| 366 | + ListViewArray, NullArray, NullBuilder, StringArray, StringViewArray, StructArray, |
350 | 367 | Time32MillisecondArray, Time32SecondArray, Time64MicrosecondArray, Time64NanosecondArray, |
351 | 368 | }; |
352 | 369 | use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; |
@@ -4321,4 +4338,135 @@ mod test { |
4321 | 4338 | ); |
4322 | 4339 | } |
4323 | 4340 | } |
| 4341 | + |
| 4342 | + macro_rules! perfectly_shredded_preserves_top_level_nulls_test { |
| 4343 | + ($name:ident, $result_type:expr, $typed_value:expr, $expected_array:expr) => { |
| 4344 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4345 | + $name, |
| 4346 | + $result_type, |
| 4347 | + $typed_value, |
| 4348 | + Some(NullBuffer::from(vec![true, false, true])), |
| 4349 | + $expected_array |
| 4350 | + ); |
| 4351 | + }; |
| 4352 | + ($name:ident, $result_type:expr, $typed_value:expr, $parent_nulls:expr, $expected_array:expr) => { |
| 4353 | + #[test] |
| 4354 | + fn $name() { |
| 4355 | + let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n( |
| 4356 | + EMPTY_VARIANT_METADATA_BYTES, |
| 4357 | + 3, |
| 4358 | + )); |
| 4359 | + let typed_value: ArrayRef = Arc::new($typed_value); |
| 4360 | + let variant_array: ArrayRef = |
| 4361 | + VariantArray::from_parts(metadata, None, Some(typed_value), $parent_nulls) |
| 4362 | + .into(); |
| 4363 | + |
| 4364 | + let result = variant_get( |
| 4365 | + &variant_array, |
| 4366 | + GetOptions::new().with_as_type(Some(FieldRef::from(Field::new( |
| 4367 | + "result", |
| 4368 | + $result_type, |
| 4369 | + true, |
| 4370 | + )))), |
| 4371 | + ) |
| 4372 | + .unwrap(); |
| 4373 | + |
| 4374 | + let expected_array: ArrayRef = Arc::new($expected_array); |
| 4375 | + assert_eq!(&result, &expected_array); |
| 4376 | + } |
| 4377 | + }; |
| 4378 | + } |
| 4379 | + |
| 4380 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4381 | + test_variant_get_perfectly_shredded_integer_preserves_top_level_nulls, |
| 4382 | + DataType::Int32, |
| 4383 | + Int32Array::from(vec![Some(0_i32), Some(1_i32), Some(2_i32)]), |
| 4384 | + Int32Array::from(vec![Some(0_i32), None, Some(2_i32)]) |
| 4385 | + ); |
| 4386 | + |
| 4387 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4388 | + test_variant_get_perfectly_shredded_integer_unions_child_and_top_level_nulls, |
| 4389 | + DataType::Int32, |
| 4390 | + Int32Array::from(vec![None, Some(1_i32), Some(2_i32)]), |
| 4391 | + Some(NullBuffer::from(vec![true, false, true])), |
| 4392 | + Int32Array::from(vec![None, None, Some(2_i32)]) |
| 4393 | + ); |
| 4394 | + |
| 4395 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4396 | + test_variant_get_perfectly_shredded_null_preserves_top_level_nulls, |
| 4397 | + DataType::Null, |
| 4398 | + NullArray::new(3), |
| 4399 | + NullArray::new(3) |
| 4400 | + ); |
| 4401 | + |
| 4402 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4403 | + test_variant_get_perfectly_shredded_binary_view_preserves_top_level_nulls, |
| 4404 | + DataType::BinaryView, |
| 4405 | + BinaryViewArray::from(vec![ |
| 4406 | + Some(b"Apache" as &[u8]), |
| 4407 | + Some(b"masked-null" as &[u8]), |
| 4408 | + Some(b"Parquet-variant" as &[u8]), |
| 4409 | + ]), |
| 4410 | + BinaryViewArray::from(vec![ |
| 4411 | + Some(b"Apache" as &[u8]), |
| 4412 | + None, |
| 4413 | + Some(b"Parquet-variant" as &[u8]), |
| 4414 | + ]) |
| 4415 | + ); |
| 4416 | + |
| 4417 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4418 | + test_variant_get_perfectly_shredded_binary_preserves_top_level_nulls, |
| 4419 | + DataType::Binary, |
| 4420 | + BinaryArray::from(vec![ |
| 4421 | + Some(b"Apache" as &[u8]), |
| 4422 | + Some(b"masked-null" as &[u8]), |
| 4423 | + Some(b"Parquet-variant" as &[u8]), |
| 4424 | + ]), |
| 4425 | + BinaryArray::from(vec![ |
| 4426 | + Some(b"Apache" as &[u8]), |
| 4427 | + None, |
| 4428 | + Some(b"Parquet-variant" as &[u8]), |
| 4429 | + ]) |
| 4430 | + ); |
| 4431 | + |
| 4432 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4433 | + test_variant_get_perfectly_shredded_decimal4_preserves_top_level_nulls, |
| 4434 | + DataType::Decimal32(5, 2), |
| 4435 | + Decimal32Array::from(vec![Some(12345), Some(23400), Some(-12342)]) |
| 4436 | + .with_precision_and_scale(5, 2) |
| 4437 | + .unwrap(), |
| 4438 | + Decimal32Array::from(vec![Some(12345), None, Some(-12342)]) |
| 4439 | + .with_precision_and_scale(5, 2) |
| 4440 | + .unwrap() |
| 4441 | + ); |
| 4442 | + |
| 4443 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4444 | + test_variant_get_perfectly_shredded_decimal8_preserves_top_level_nulls, |
| 4445 | + DataType::Decimal64(10, 1), |
| 4446 | + Decimal64Array::from(vec![Some(1234567809), Some(1456787000), Some(-1234561203)]) |
| 4447 | + .with_precision_and_scale(10, 1) |
| 4448 | + .unwrap(), |
| 4449 | + Decimal64Array::from(vec![Some(1234567809), None, Some(-1234561203)]) |
| 4450 | + .with_precision_and_scale(10, 1) |
| 4451 | + .unwrap() |
| 4452 | + ); |
| 4453 | + |
| 4454 | + perfectly_shredded_preserves_top_level_nulls_test!( |
| 4455 | + test_variant_get_perfectly_shredded_decimal16_preserves_top_level_nulls, |
| 4456 | + DataType::Decimal128(20, 3), |
| 4457 | + Decimal128Array::from(vec![ |
| 4458 | + Some(i128::from_str("12345678901234567899").unwrap()), |
| 4459 | + Some(i128::from_str("23445677483748324300").unwrap()), |
| 4460 | + Some(i128::from_str("-12345678901234567899").unwrap()), |
| 4461 | + ]) |
| 4462 | + .with_precision_and_scale(20, 3) |
| 4463 | + .unwrap(), |
| 4464 | + Decimal128Array::from(vec![ |
| 4465 | + Some(i128::from_str("12345678901234567899").unwrap()), |
| 4466 | + None, |
| 4467 | + Some(i128::from_str("-12345678901234567899").unwrap()), |
| 4468 | + ]) |
| 4469 | + .with_precision_and_scale(20, 3) |
| 4470 | + .unwrap() |
| 4471 | + ); |
4324 | 4472 | } |
0 commit comments