diff --git a/.asf.yaml b/.asf.yaml
index 36f01b88a724..9214924add68 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -46,6 +46,8 @@ github:
         strict: true
         # don't require any jobs to pass
         contexts: []
+      required_pull_request_reviews:
+        required_approving_review_count: 1
   pull_requests:
     # enable updating head branches of pull requests
     allow_update_branch: true
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 12e22abce06d..5762ba1ffce3 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -71,7 +71,7 @@ jobs:
     steps:
       - uses: actions/checkout@v6
       - name: Download crate docs
-        uses: actions/download-artifact@v7
+        uses: actions/download-artifact@v8
         with:
           name: crate-docs
           path: website/build
diff --git a/CHANGELOG-old.md b/CHANGELOG-old.md
index 300c1f4b2e40..273884ea1fa6 100644
--- a/CHANGELOG-old.md
+++ b/CHANGELOG-old.md
@@ -19,6 +19,202 @@
 
 # Historical Changelog
 
+
+## [58.0.0](https://github.com/apache/arrow-rs/tree/58.0.0) (2026-02-19)
+
+[Full Changelog](https://github.com/apache/arrow-rs/compare/57.3.0...58.0.0)
+
+**Breaking changes:**
+
+- Remove support for List types in bit\_length kernel [\#9350](https://github.com/apache/arrow-rs/pull/9350) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
+- Optimize `from_bitwise_unary_op` [\#9297](https://github.com/apache/arrow-rs/pull/9297) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Mark `BufferBuilder::new_from_buffer` as unsafe [\#9292](https://github.com/apache/arrow-rs/pull/9292) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- \[Variant\] Support `['fieldName']` in VariantPath parser [\#9276](https://github.com/apache/arrow-rs/pull/9276) ([klion26](https://github.com/klion26))
+- Remove parquet arrow\_cast dependency [\#9077](https://github.com/apache/arrow-rs/pull/9077) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
+- feat: change default behavior for Parquet `PageEncodingStats` to bitmask [\#9051](https://github.com/apache/arrow-rs/pull/9051) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([WaterWhisperer](https://github.com/WaterWhisperer))
+- \[arrow\] Minimize allocation in GenericViewArray::slice\(\) [\#9016](https://github.com/apache/arrow-rs/pull/9016) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([maxburke](https://github.com/maxburke))
+
+**Implemented enhancements:**
+
+- Avoid allocating a `Vec` in `StructBuilder` [\#9427](https://github.com/apache/arrow-rs/issues/9427)
+- Zstd context reuse [\#9401](https://github.com/apache/arrow-rs/issues/9401)
+- Optimize `from_bitwise_unary_op` [\#9364](https://github.com/apache/arrow-rs/issues/9364)
+- Support `RunEndEncoded` in ord comparator [\#9360](https://github.com/apache/arrow-rs/issues/9360)
+- Support `RunEndEncoded` arrays in `arrow-json` [\#9359](https://github.com/apache/arrow-rs/issues/9359)
+- Support `BinaryView` in `bit_length` kernel [\#9351](https://github.com/apache/arrow-rs/issues/9351)
+- Remove support for `List` types in `bit_length` kernel [\#9349](https://github.com/apache/arrow-rs/issues/9349)
+- Support roundtrip `ListView` in parquet arrow writer [\#9344](https://github.com/apache/arrow-rs/issues/9344)
+- Support `ListView` in `length` kernel [\#9343](https://github.com/apache/arrow-rs/issues/9343)
+- Support `ListView` in sort kernel [\#9341](https://github.com/apache/arrow-rs/issues/9341)
+- Add some way to create a Timestamp from a `DateTime` [\#9337](https://github.com/apache/arrow-rs/issues/9337)
+- Introduce `DataType::is_list` and `DataType::IsBinary` [\#9326](https://github.com/apache/arrow-rs/issues/9326)
+- Performance of creating all null dictionary array can be improved [\#9321](https://github.com/apache/arrow-rs/issues/9321)
+- \[arrow-avro\] Add missing Arrow DataType support with `avro_custom_types` round-trip + non-custom fallbacks [\#9290](https://github.com/apache/arrow-rs/issues/9290)
+
+**Fixed bugs:**
+
+- ArrowArrayStreamReader errors on zero-column record batches [\#9394](https://github.com/apache/arrow-rs/issues/9394)
+- Regression on main \(58\): Parquet argument error: Parquet error: Required field type\_ is missing [\#9315](https://github.com/apache/arrow-rs/issues/9315) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+
+**Documentation updates:**
+
+- Improve safety documentation of the `Array` trait [\#9314](https://github.com/apache/arrow-rs/pull/9314) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve docs and add build\(\) method to `{Null,Boolean,}BufferBuilder` [\#9155](https://github.com/apache/arrow-rs/pull/9155) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Improve `ArrowReaderBuilder::with_row_filter` documentation [\#9153](https://github.com/apache/arrow-rs/pull/9153) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- docs: Improve main README.md and highlight community [\#9119](https://github.com/apache/arrow-rs/pull/9119) ([alamb](https://github.com/alamb))
+- Docs: Add additional documentation and example for  `make_array` [\#9112](https://github.com/apache/arrow-rs/pull/9112) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- doc: fix link on FixedSizeListArray doc [\#9033](https://github.com/apache/arrow-rs/pull/9033) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+
+**Performance improvements:**
+
+- Replace `ArrayData` with direct Array construction [\#9338](https://github.com/apache/arrow-rs/pull/9338) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
+- Remove some `unsafe` and allocations when creating PrimitiveArrays from Vec and `from_trusted_len_iter` [\#9299](https://github.com/apache/arrow-rs/pull/9299) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- parquet: rle skip decode loop when batch contains all max levels \(aka no nulls\) [\#9258](https://github.com/apache/arrow-rs/pull/9258) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24))
+- Improve parquet BinaryView / StringView decoder performance \(up to -35%\) [\#9236](https://github.com/apache/arrow-rs/pull/9236) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
+- Avoid a clone when creating `BooleanArray` from ArrayData [\#9159](https://github.com/apache/arrow-rs/pull/9159) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid overallocating arrays in coalesce primitives / views [\#9132](https://github.com/apache/arrow-rs/pull/9132) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- perf: Avoid ArrayData allocation in PrimitiveArray::reinterpret\_cast [\#9129](https://github.com/apache/arrow-rs/pull/9129) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Parquet\] perf: Create StructArrays directly rather than via `ArrayData` \(1% improvement\) [\#9120](https://github.com/apache/arrow-rs/pull/9120) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid clones in `make_array` for `StructArray` and `GenericByteViewArray` [\#9114](https://github.com/apache/arrow-rs/pull/9114) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- perf: optimize hex decoding in json \(1.8x faster in binary-heavy\) [\#9091](https://github.com/apache/arrow-rs/pull/9091) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Speed up binary kernels \(30% faster `and` and `or`\), add `BooleanBuffer::from_bitwise_binary_op` [\#9090](https://github.com/apache/arrow-rs/pull/9090) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- perf: improve field indexing in JSON StructArrayDecoder \(1.7x speed up\) [\#9086](https://github.com/apache/arrow-rs/pull/9086) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- bench: added to row\_format benchmark conversion of 53 non-nested columns [\#9081](https://github.com/apache/arrow-rs/pull/9081) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- perf: improve calculating length performance for view byte array in row conversion [\#9080](https://github.com/apache/arrow-rs/pull/9080) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- perf: improve calculating length performance for nested arrays in row conversion [\#9079](https://github.com/apache/arrow-rs/pull/9079) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- perf: improve calculating length performance for `GenericByteArray` in row conversion [\#9078](https://github.com/apache/arrow-rs/pull/9078) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+
+**Closed issues:**
+
+- BatchCoalescer::push\_batch panics on schema mismatch instead of returning error [\#9389](https://github.com/apache/arrow-rs/issues/9389)
+- Release arrow-rs / parquet Minor version `57.3.0` \(January 2026\) [\#9240](https://github.com/apache/arrow-rs/issues/9240)
+- \[Variant\] support `..` and `['fieldName']` syntax in the VariantPath parser [\#9050](https://github.com/apache/arrow-rs/issues/9050)
+- Support Float16 for create\_random\_array [\#9028](https://github.com/apache/arrow-rs/issues/9028)
+
+**Merged pull requests:**
+
+- Avoid allocating a `Vec` in `StructBuilder` [\#9428](https://github.com/apache/arrow-rs/pull/9428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko))
+- fix: fixed trait functions clash get\_date\_time\_part\_extract\_fn \(\#8221\) [\#9424](https://github.com/apache/arrow-rs/pull/9424) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([esavier](https://github.com/esavier))
+- \[Minor\] Use per-predicate projection masks in arrow\_reader\_clickbench benchmark [\#9413](https://github.com/apache/arrow-rs/pull/9413) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
+- Fix `ArrowArrayStreamReader` for 0-columns record batch streams [\#9405](https://github.com/apache/arrow-rs/pull/9405) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonded94](https://github.com/jonded94))
+- Use zstd::bulk API in IPC and Parquet with context reuse for compression and decompression [\#9400](https://github.com/apache/arrow-rs/pull/9400) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Reproduce the issue of \#9370 in a minimal, end-to-end way [\#9399](https://github.com/apache/arrow-rs/pull/9399) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jonded94](https://github.com/jonded94))
+- perf: optimize skipper for varint values used when projecting Avro record types [\#9397](https://github.com/apache/arrow-rs/pull/9397) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev))
+- fix: return error instead of panic on schema mismatch in BatchCoalescer::push\_batch [\#9390](https://github.com/apache/arrow-rs/pull/9390) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([bvolpato-dd](https://github.com/bvolpato-dd))
+- Minor: Add additional test coverage for WriterProperties::{max\_row\_group\_row\_count,max\_row\_group\_size} [\#9387](https://github.com/apache/arrow-rs/pull/9387) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- Moving invalid\_utf8 tests into a separate mod [\#9384](https://github.com/apache/arrow-rs/pull/9384) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
+- Update sysinfo requirement from 0.37.1 to 0.38.1 [\#9383](https://github.com/apache/arrow-rs/pull/9383) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- feat: support RunEndEncoded arrays in arrow-json reader and writer [\#9379](https://github.com/apache/arrow-rs/pull/9379) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Abhisheklearn12](https://github.com/Abhisheklearn12))
+- Remove lint issues in parquet-related code. [\#9375](https://github.com/apache/arrow-rs/pull/9375) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([brunal](https://github.com/brunal))
+- Add RunEndEncoded array comparator [\#9368](https://github.com/apache/arrow-rs/pull/9368) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
+- feat: support BinaryView in bit\_length kernel [\#9363](https://github.com/apache/arrow-rs/pull/9363) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Abhisheklearn12](https://github.com/Abhisheklearn12))
+- Add regression tests for Parquet large binary offset overflow [\#9361](https://github.com/apache/arrow-rs/pull/9361) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vigneshsiva11](https://github.com/vigneshsiva11))
+- feat: add max\_row\_group\_bytes option to WriterProperties [\#9357](https://github.com/apache/arrow-rs/pull/9357) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yonipeleg33](https://github.com/yonipeleg33))
+- doc: remove disclaimer about `ListView` not being fully supported [\#9356](https://github.com/apache/arrow-rs/pull/9356) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Move row\_filter async tests from parquet async reader [\#9355](https://github.com/apache/arrow-rs/pull/9355) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
+- \[Parquet\] Allow setting page size per column [\#9353](https://github.com/apache/arrow-rs/pull/9353) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
+- feat: Support roundtrip ListView in parquet arrow writer [\#9352](https://github.com/apache/arrow-rs/pull/9352) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([codephage2020](https://github.com/codephage2020))
+- feat: add ListView and LargeListView support to arrow-ord [\#9347](https://github.com/apache/arrow-rs/pull/9347) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
+- Support ListView in length kernel [\#9346](https://github.com/apache/arrow-rs/pull/9346) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vegarsti](https://github.com/vegarsti))
+- feat: Add from\_datetime method to Timestamp types [\#9345](https://github.com/apache/arrow-rs/pull/9345) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
+- \[main\] Update version to 57.3.0, add changelog [\#9334](https://github.com/apache/arrow-rs/pull/9334) ([alamb](https://github.com/alamb))
+- build\(deps\): update pyo3 requirement from 0.27.1 to 0.28.0 [\#9331](https://github.com/apache/arrow-rs/pull/9331) ([dependabot[bot]](https://github.com/apps/dependabot))
+- Add `DataType::is_list` and `DataType::is_binary` [\#9327](https://github.com/apache/arrow-rs/pull/9327) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS))
+- Fix string array equality when the values buffer is the same and only the offsets to access it differ [\#9325](https://github.com/apache/arrow-rs/pull/9325) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+-  perf: skip validation of dictionary keys if all null [\#9322](https://github.com/apache/arrow-rs/pull/9322) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
+- parquet: use rwlock instead of mutex in predicate cache [\#9319](https://github.com/apache/arrow-rs/pull/9319) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24))
+- nit: remove usused code [\#9318](https://github.com/apache/arrow-rs/pull/9318) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24))
+- Remove unnecessary Arc\<ArrayRef\> [\#9316](https://github.com/apache/arrow-rs/pull/9316) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
+- Optimize data page statistics conversion \(up to 4x\) [\#9303](https://github.com/apache/arrow-rs/pull/9303) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- \[regression\] Error with adaptive predicate pushdown: "Invalid offset in sparse column chunk data: 754, no matching page found." [\#9301](https://github.com/apache/arrow-rs/pull/9301) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
+- Improve `PrimitiveArray::from_iter` perf [\#9294](https://github.com/apache/arrow-rs/pull/9294) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Add additional Arrow type support  [\#9291](https://github.com/apache/arrow-rs/pull/9291) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- fix: ensure `BufferBuilder::truncate` doesn't overset length [\#9288](https://github.com/apache/arrow-rs/pull/9288) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Add benchmark for row group index reader perf [\#9285](https://github.com/apache/arrow-rs/pull/9285) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- fix union array row converter to handle non-sequential type ids [\#9283](https://github.com/apache/arrow-rs/pull/9283) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew))
+- parquet: reduce clone in delta byte array decoder [\#9282](https://github.com/apache/arrow-rs/pull/9282) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24))
+- fix: fix \[\[NULL\]\] array doesn't roundtrip in arrow-row bug [\#9275](https://github.com/apache/arrow-rs/pull/9275) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lichuang](https://github.com/lichuang))
+- Enhance list casting, adding more cases for list views [\#9274](https://github.com/apache/arrow-rs/pull/9274) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- \[Variant\] Add path index access tests for list [\#9273](https://github.com/apache/arrow-rs/pull/9273) ([liamzwbao](https://github.com/liamzwbao))
+- Factor out json reader's static make\_decoder args to a struct [\#9271](https://github.com/apache/arrow-rs/pull/9271) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
+- make\_decoder accepts borrowed DataType instead of owned [\#9270](https://github.com/apache/arrow-rs/pull/9270) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
+- Implement a more generic from\_nested\_iter method for list arrays [\#9268](https://github.com/apache/arrow-rs/pull/9268) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
+- Move extension type construction logic out of Field [\#9266](https://github.com/apache/arrow-rs/pull/9266) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
+- fix: support casting string to f16 [\#9262](https://github.com/apache/arrow-rs/pull/9262) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Add additional coverage for StringViewArray comparisons [\#9257](https://github.com/apache/arrow-rs/pull/9257) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Upgrade to object store 0.13.1 [\#9256](https://github.com/apache/arrow-rs/pull/9256) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
+- \[Parquet\] test adaptive predicate pushdown with skipped page [\#9251](https://github.com/apache/arrow-rs/pull/9251) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
+- Speed up string view comparison \(up to 3x\) [\#9250](https://github.com/apache/arrow-rs/pull/9250) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Add bench for LocalFileSystem [\#9248](https://github.com/apache/arrow-rs/pull/9248) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
+- \[Parquet\] Add test for reading/writing long UTF8 StringViews [\#9246](https://github.com/apache/arrow-rs/pull/9246) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Parquet\] test adaptive predicate pushdown with skipped page [\#9243](https://github.com/apache/arrow-rs/pull/9243) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([erratic-pattern](https://github.com/erratic-pattern))
+- Add tests and fixes for schema resolution bug [\#9237](https://github.com/apache/arrow-rs/pull/9237) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Revert "Seal Array trait \(\#9092\)", mark `Array` as `unsafe` [\#9234](https://github.com/apache/arrow-rs/pull/9234) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gabotechs](https://github.com/gabotechs))
+- Speedup filter \(up to ~1.5x\) `FilterBuilder::Optimize`/`BitIndexIterator`/`iter_set_bits_rev` [\#9229](https://github.com/apache/arrow-rs/pull/9229) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- optimize `RowGroupIndexReader` for single row group reads [\#9226](https://github.com/apache/arrow-rs/pull/9226) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- test: improve arrow-row fuzz tests [\#9222](https://github.com/apache/arrow-rs/pull/9222) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- arrow-cast: support packing to Dictionary\(\_, Utf8View/BinaryView\) [\#9220](https://github.com/apache/arrow-rs/pull/9220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ethan-tyler](https://github.com/ethan-tyler))
+- Add additional test coverage for  `BatchCoalescer` push\_batch\_with\_filter [\#9218](https://github.com/apache/arrow-rs/pull/9218) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Parquet\] Optimize appending max level comparison in DefinitionLevelDecoder [\#9217](https://github.com/apache/arrow-rs/pull/9217) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
+- Remove dead code to fix clippy failure on main [\#9215](https://github.com/apache/arrow-rs/pull/9215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Parquet\] perf: reuse seeked File clone in ChunkReader::get\_read\(\) [\#9214](https://github.com/apache/arrow-rs/pull/9214) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([fvaleye](https://github.com/fvaleye))
+- fix: \[9018\]Fixed RunArray slice offsets\(row, cast, eq\) [\#9213](https://github.com/apache/arrow-rs/pull/9213) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr))
+- Add benchmarks for reading struct arrays from parquet [\#9210](https://github.com/apache/arrow-rs/pull/9210) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
+- Support casting negative scale decimals to numeric [\#9207](https://github.com/apache/arrow-rs/pull/9207) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Chiicake](https://github.com/Chiicake))
+- Deprecate `ArrowReaderOptions::with_page_index` and update API [\#9199](https://github.com/apache/arrow-rs/pull/9199) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- arrow-ipc: add reset method to DictionaryTracker [\#9196](https://github.com/apache/arrow-rs/pull/9196) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
+- Avoid a clone when creating `ListArray` from ArrayData [\#9194](https://github.com/apache/arrow-rs/pull/9194) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `ListViewArray` from ArrayData [\#9193](https://github.com/apache/arrow-rs/pull/9193) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `MapArray` from ArrayData [\#9192](https://github.com/apache/arrow-rs/pull/9192) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `NullArray` from ArrayData [\#9191](https://github.com/apache/arrow-rs/pull/9191) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `PrimitiveArray` from ArrayData [\#9190](https://github.com/apache/arrow-rs/pull/9190) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `RunEndArray` from ArrayData [\#9189](https://github.com/apache/arrow-rs/pull/9189) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `UnionArray` from ArrayData [\#9188](https://github.com/apache/arrow-rs/pull/9188) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `FixedSizeListArray` from ArrayData [\#9187](https://github.com/apache/arrow-rs/pull/9187) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `FixedSizeBinaryArray` from ArrayData [\#9186](https://github.com/apache/arrow-rs/pull/9186) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Avoid a clone when creating `DictionaryArray` from ArrayData [\#9185](https://github.com/apache/arrow-rs/pull/9185) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- fix: take\_run return empty array instead of panic. [\#9182](https://github.com/apache/arrow-rs/pull/9182) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thorfour](https://github.com/thorfour))
+- lint: remove unused function \(fix clippy [\#9178](https://github.com/apache/arrow-rs/pull/9178) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- add `#[inline]` to `BitIterator` `next` function  [\#9177](https://github.com/apache/arrow-rs/pull/9177) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Add ListView support to `arrow-row` and `arrow-ord` [\#9176](https://github.com/apache/arrow-rs/pull/9176) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- arrow-cast: Add display formatter for ListView [\#9175](https://github.com/apache/arrow-rs/pull/9175) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Add BinaryFormatSupport and Row Encoder to `arrow-avro` Writer [\#9171](https://github.com/apache/arrow-rs/pull/9171) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- docs\(parquet\): move async parquet example into ArrowReaderBuilder docs [\#9167](https://github.com/apache/arrow-rs/pull/9167) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vigneshsiva11](https://github.com/vigneshsiva11))
+- feat\(array\): add `RecordBatchStream` trait [\#9166](https://github.com/apache/arrow-rs/pull/9166) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lidavidm](https://github.com/lidavidm))
+- refactor: streamline date64 tests [\#9165](https://github.com/apache/arrow-rs/pull/9165) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cht42](https://github.com/cht42))
+- docs: update examples in ArrowReaderOptions to use in-memory buffers [\#9163](https://github.com/apache/arrow-rs/pull/9163) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AndreaBozzo](https://github.com/AndreaBozzo))
+- Add Avro Reader projection API [\#9162](https://github.com/apache/arrow-rs/pull/9162) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
+- Avoid a clone when creating StringArray/BinaryArray from ArrayData [\#9160](https://github.com/apache/arrow-rs/pull/9160) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- fix missing utf8 check for conversion from BinaryViewArray to StringViewArray [\#9158](https://github.com/apache/arrow-rs/pull/9158) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Minor: try and avoid an allocation creating `GenericByteViewArray` from `ArrayData` [\#9156](https://github.com/apache/arrow-rs/pull/9156) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add find\_nth\_set\_bit\_position [\#9151](https://github.com/apache/arrow-rs/pull/9151) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- feat: add null comparison handling in make\_comparator [\#9150](https://github.com/apache/arrow-rs/pull/9150) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- Uncomment part of test\_utf8\_single\_column\_reader\_test [\#9148](https://github.com/apache/arrow-rs/pull/9148) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
+- arrow-ipc: Add tests for nested dicts for Map and Union arrays [\#9146](https://github.com/apache/arrow-rs/pull/9146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Update ASF copyright year in NOTICE [\#9145](https://github.com/apache/arrow-rs/pull/9145) ([mohit7705](https://github.com/mohit7705))
+- Avoid panic on Date32 overflow [\#9144](https://github.com/apache/arrow-rs/pull/9144) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cht42](https://github.com/cht42))
+- feat: add `reserve` to `Rows` [\#9142](https://github.com/apache/arrow-rs/pull/9142) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- docs\(variant\): fix VariantObject::get documentation to reflect Option return type [\#9139](https://github.com/apache/arrow-rs/pull/9139) ([mohit7705](https://github.com/mohit7705))
+- Add `BooleanBufferBuilder::extend_trusted_len` [\#9137](https://github.com/apache/arrow-rs/pull/9137) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- fix: support cast from `Null` to list view/run encoded/union types [\#9134](https://github.com/apache/arrow-rs/pull/9134) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Fix clippy [\#9130](https://github.com/apache/arrow-rs/pull/9130) ([alamb](https://github.com/alamb))
+- Fix IPC roundtripping dicts nested in ListViews [\#9126](https://github.com/apache/arrow-rs/pull/9126) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
+- Update readme for geospatial crate [\#9124](https://github.com/apache/arrow-rs/pull/9124) ([paleolimbot](https://github.com/paleolimbot))
+- \[Parquet\] perf: Create `PrimitiveArray`s directly rather than via `ArrayData` [\#9122](https://github.com/apache/arrow-rs/pull/9122) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[Parquet\] perf: Create Utf8/BinaryViewArray directly rather than via `ArrayData` [\#9121](https://github.com/apache/arrow-rs/pull/9121) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
+- \[parquet\] Add row group index virtual column [\#9117](https://github.com/apache/arrow-rs/pull/9117) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
+- docs\(parquet\): add example for preserving dictionary encoding [\#9116](https://github.com/apache/arrow-rs/pull/9116) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AndreaBozzo](https://github.com/AndreaBozzo))
+- doc: add example of RowFilter usage [\#9115](https://github.com/apache/arrow-rs/pull/9115) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sonhmai](https://github.com/sonhmai))
+- docs: Update release schedule in README.md [\#9111](https://github.com/apache/arrow-rs/pull/9111) ([alamb](https://github.com/alamb))
+- feat: add benchmarks for json parser [\#9107](https://github.com/apache/arrow-rs/pull/9107) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
+- chore: switch test from `bincode` to maintained `postcard` crate \(RUSTSEC-2025-0141 \) [\#9104](https://github.com/apache/arrow-rs/pull/9104) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- Add nullif\_kernel benchmark [\#9089](https://github.com/apache/arrow-rs/pull/9089) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
+- \[Variant\] Support Shredded Lists/Array in `variant_get` [\#9049](https://github.com/apache/arrow-rs/pull/9049) ([liamzwbao](https://github.com/liamzwbao))
+- fix:\[9018\]Fixed RunArray slice offsets [\#9036](https://github.com/apache/arrow-rs/pull/9036) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr))
+- Support Float16 for create\_random\_array [\#9029](https://github.com/apache/arrow-rs/pull/9029) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([niebayes](https://github.com/niebayes))
+- fix: display `0 secs` for empty DayTime/MonthDayNano intervals [\#9023](https://github.com/apache/arrow-rs/pull/9023) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Add options to skip decoding `Statistics` and `SizeStatistics` in Parquet metadata [\#9008](https://github.com/apache/arrow-rs/pull/9008) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+
 ## [57.3.0](https://github.com/apache/arrow-rs/tree/57.3.0) (2026-02-02)
 
 [Full Changelog](https://github.com/apache/arrow-rs/compare/57.2.0...57.3.0)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e73ee8ba3356..baccdfa79cbd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,200 +19,123 @@
 
 # Changelog
 
-## [58.0.0](https://github.com/apache/arrow-rs/tree/58.0.0) (2026-02-19)
+## [58.1.0](https://github.com/apache/arrow-rs/tree/58.1.0) (2026-03-20)
 
-[Full Changelog](https://github.com/apache/arrow-rs/compare/57.3.0...58.0.0)
-
-**Breaking changes:**
-
-- Remove support for List types in bit\_length kernel [\#9350](https://github.com/apache/arrow-rs/pull/9350) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
-- Optimize `from_bitwise_unary_op` [\#9297](https://github.com/apache/arrow-rs/pull/9297) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Mark `BufferBuilder::new_from_buffer` as unsafe [\#9292](https://github.com/apache/arrow-rs/pull/9292) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- \[Variant\] Support `['fieldName']` in VariantPath parser [\#9276](https://github.com/apache/arrow-rs/pull/9276) ([klion26](https://github.com/klion26))
-- Remove parquet arrow\_cast dependency [\#9077](https://github.com/apache/arrow-rs/pull/9077) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([tustvold](https://github.com/tustvold))
-- feat: change default behavior for Parquet `PageEncodingStats` to bitmask [\#9051](https://github.com/apache/arrow-rs/pull/9051) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([WaterWhisperer](https://github.com/WaterWhisperer))
-- \[arrow\] Minimize allocation in GenericViewArray::slice\(\) [\#9016](https://github.com/apache/arrow-rs/pull/9016) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([maxburke](https://github.com/maxburke))
+[Full Changelog](https://github.com/apache/arrow-rs/compare/58.0.0...58.1.0)
 
 **Implemented enhancements:**
 
-- Avoid allocating a `Vec` in `StructBuilder` [\#9427](https://github.com/apache/arrow-rs/issues/9427)
-- Zstd context reuse [\#9401](https://github.com/apache/arrow-rs/issues/9401)
-- Optimize `from_bitwise_unary_op` [\#9364](https://github.com/apache/arrow-rs/issues/9364)
-- Support `RunEndEncoded` in ord comparator [\#9360](https://github.com/apache/arrow-rs/issues/9360)
-- Support `RunEndEncoded` arrays in `arrow-json` [\#9359](https://github.com/apache/arrow-rs/issues/9359)
-- Support `BinaryView` in `bit_length` kernel [\#9351](https://github.com/apache/arrow-rs/issues/9351)
-- Remove support for `List` types in `bit_length` kernel [\#9349](https://github.com/apache/arrow-rs/issues/9349)
-- Support roundtrip `ListView` in parquet arrow writer [\#9344](https://github.com/apache/arrow-rs/issues/9344)
-- Support `ListView` in `length` kernel [\#9343](https://github.com/apache/arrow-rs/issues/9343)
-- Support `ListView` in sort kernel [\#9341](https://github.com/apache/arrow-rs/issues/9341)
-- Add some way to create a Timestamp from a `DateTime` [\#9337](https://github.com/apache/arrow-rs/issues/9337)
-- Introduce `DataType::is_list` and `DataType::IsBinary` [\#9326](https://github.com/apache/arrow-rs/issues/9326)
-- Performance of creating all null dictionary array can be improved [\#9321](https://github.com/apache/arrow-rs/issues/9321)
-- \[arrow-avro\] Add missing Arrow DataType support with `avro_custom_types` round-trip + non-custom fallbacks [\#9290](https://github.com/apache/arrow-rs/issues/9290)
+- Reuse compression dict lz4\_block [\#9566](https://github.com/apache/arrow-rs/issues/9566)
+- \[Variant\] Add `variant_to_arrow` `Struct` type support [\#9529](https://github.com/apache/arrow-rs/issues/9529)
+- \[Variant\] Add `unshred_variant` support for `Binary` and `LargeBinary` types [\#9526](https://github.com/apache/arrow-rs/issues/9526)
+- \[Variant\] Add `shred_variant` support for `LargeUtf8` and `LargeBinary` types [\#9525](https://github.com/apache/arrow-rs/issues/9525)
+- \[Variant\] `variant_get` tests clean up [\#9517](https://github.com/apache/arrow-rs/issues/9517)
+- parquet\_variant: Support LargeUtf8 typed value in `unshred_variant` [\#9513](https://github.com/apache/arrow-rs/issues/9513)
+- parquet-variant: Support string view typed value in `unshred_variant` [\#9512](https://github.com/apache/arrow-rs/issues/9512)
+- Deprecate ArrowTimestampType::make\_value in favor of from\_naive\_datetime [\#9490](https://github.com/apache/arrow-rs/issues/9490) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Followup for support \['fieldName'\] in VariantPath [\#9478](https://github.com/apache/arrow-rs/issues/9478)
+- Speedup DELTA\_BINARY\_PACKED decoding when bitwidth is 0 [\#9476](https://github.com/apache/arrow-rs/issues/9476) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Support CSV files encoded with charsets other than UTF-8 [\#9465](https://github.com/apache/arrow-rs/issues/9465) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Expose Avro writer schema when building the reader [\#9460](https://github.com/apache/arrow-rs/issues/9460) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Python: avoid importing pyarrow classes ever time [\#9438](https://github.com/apache/arrow-rs/issues/9438)
+- Add `append_nulls` to `MapBuilder` [\#9431](https://github.com/apache/arrow-rs/issues/9431) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `append_non_nulls` to `StructBuilder` [\#9429](https://github.com/apache/arrow-rs/issues/9429) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Add `append_value_n` to GenericByteBuilder [\#9425](https://github.com/apache/arrow-rs/issues/9425) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Optimize `from_bitwise_binary_op` [\#9378](https://github.com/apache/arrow-rs/issues/9378) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Configurable Arrow representation of UTC timestamps for Avro reader [\#9279](https://github.com/apache/arrow-rs/issues/9279) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Fixed bugs:**
 
-- ArrowArrayStreamReader errors on zero-column record batches [\#9394](https://github.com/apache/arrow-rs/issues/9394)
-- Regression on main \(58\): Parquet argument error: Parquet error: Required field type\_ is missing [\#9315](https://github.com/apache/arrow-rs/issues/9315) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- MutableArrayData::extend does not copy child values for ListView arrays [\#9561](https://github.com/apache/arrow-rs/issues/9561) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- ListView interleave bug [\#9559](https://github.com/apache/arrow-rs/issues/9559) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Flight encoding panics with "no dict id for field" with nested dict arrays [\#9555](https://github.com/apache/arrow-rs/issues/9555) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)]
+- "DeltaBitPackDecoder only supports Int32Type and Int64Type" but unsigned types are supported too [\#9551](https://github.com/apache/arrow-rs/issues/9551) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Potential overflow when calling `util::bit_mask::set_bits` \(soundness issue\) [\#9543](https://github.com/apache/arrow-rs/issues/9543) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- handle Null type in try\_merge for Struct, List, LargeList, and Union [\#9523](https://github.com/apache/arrow-rs/issues/9523) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Invalid offset in sparse column chunk data for multiple predicates [\#9516](https://github.com/apache/arrow-rs/issues/9516) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- debug\_assert\_eq! in BatchCoalescer panics in debug mode when batch\_size \< 4 [\#9506](https://github.com/apache/arrow-rs/issues/9506) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- Parquet Statistics::null\_count\_opt wrongly returns Some\(0\) when stats are missing [\#9451](https://github.com/apache/arrow-rs/issues/9451) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Error "Not all children array length are the same!" when decoding rows spanning across page boundaries in parquet file when using `RowSelection` [\#9370](https://github.com/apache/arrow-rs/issues/9370) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)]
+- Avro schema resolution not properly supported for complex types [\#9336](https://github.com/apache/arrow-rs/issues/9336) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Documentation updates:**
 
-- Improve safety documentation of the `Array` trait [\#9314](https://github.com/apache/arrow-rs/pull/9314) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Improve docs and add build\(\) method to `{Null,Boolean,}BufferBuilder` [\#9155](https://github.com/apache/arrow-rs/pull/9155) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Improve `ArrowReaderBuilder::with_row_filter` documentation [\#9153](https://github.com/apache/arrow-rs/pull/9153) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- docs: Improve main README.md and highlight community [\#9119](https://github.com/apache/arrow-rs/pull/9119) ([alamb](https://github.com/alamb))
-- Docs: Add additional documentation and example for  `make_array` [\#9112](https://github.com/apache/arrow-rs/pull/9112) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- doc: fix link on FixedSizeListArray doc [\#9033](https://github.com/apache/arrow-rs/pull/9033) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
+- Update planned release schedule in README.md [\#9466](https://github.com/apache/arrow-rs/pull/9466) ([alamb](https://github.com/alamb))
 
 **Performance improvements:**
 
-- Replace `ArrayData` with direct Array construction [\#9338](https://github.com/apache/arrow-rs/pull/9338) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
-- Remove some `unsafe` and allocations when creating PrimitiveArrays from Vec and `from_trusted_len_iter` [\#9299](https://github.com/apache/arrow-rs/pull/9299) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- parquet: rle skip decode loop when batch contains all max levels \(aka no nulls\) [\#9258](https://github.com/apache/arrow-rs/pull/9258) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24))
-- Improve parquet BinaryView / StringView decoder performance \(up to -35%\) [\#9236](https://github.com/apache/arrow-rs/pull/9236) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
-- Avoid a clone when creating `BooleanArray` from ArrayData [\#9159](https://github.com/apache/arrow-rs/pull/9159) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid overallocating arrays in coalesce primitives / views [\#9132](https://github.com/apache/arrow-rs/pull/9132) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- perf: Avoid ArrayData allocation in PrimitiveArray::reinterpret\_cast [\#9129](https://github.com/apache/arrow-rs/pull/9129) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- \[Parquet\] perf: Create StructArrays directly rather than via `ArrayData` \(1% improvement\) [\#9120](https://github.com/apache/arrow-rs/pull/9120) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid clones in `make_array` for `StructArray` and `GenericByteViewArray` [\#9114](https://github.com/apache/arrow-rs/pull/9114) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- perf: optimize hex decoding in json \(1.8x faster in binary-heavy\) [\#9091](https://github.com/apache/arrow-rs/pull/9091) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
-- Speed up binary kernels \(30% faster `and` and `or`\), add `BooleanBuffer::from_bitwise_binary_op` [\#9090](https://github.com/apache/arrow-rs/pull/9090) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- perf: improve field indexing in JSON StructArrayDecoder \(1.7x speed up\) [\#9086](https://github.com/apache/arrow-rs/pull/9086) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
-- bench: added to row\_format benchmark conversion of 53 non-nested columns [\#9081](https://github.com/apache/arrow-rs/pull/9081) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
-- perf: improve calculating length performance for view byte array in row conversion [\#9080](https://github.com/apache/arrow-rs/pull/9080) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
-- perf: improve calculating length performance for nested arrays in row conversion [\#9079](https://github.com/apache/arrow-rs/pull/9079) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
-- perf: improve calculating length performance for `GenericByteArray` in row conversion [\#9078](https://github.com/apache/arrow-rs/pull/9078) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Introduce `NullBuffer::try_from_unsliced` to simplify array construction [\#9385](https://github.com/apache/arrow-rs/issues/9385) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
+- perf: Coalesce page fetches when RowSelection selects all rows [\#9578](https://github.com/apache/arrow-rs/pull/9578) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
+- Use chunks\_exact for has\_true/has\_false to enable compiler unrolling [\#9570](https://github.com/apache/arrow-rs/pull/9570) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb))
+- pyarrow: Cache the imported classes to avoid importing them each time [\#9439](https://github.com/apache/arrow-rs/pull/9439) ([Tpt](https://github.com/Tpt))
 
 **Closed issues:**
 
-- BatchCoalescer::push\_batch panics on schema mismatch instead of returning error [\#9389](https://github.com/apache/arrow-rs/issues/9389)
-- Release arrow-rs / parquet Minor version `57.3.0` \(January 2026\) [\#9240](https://github.com/apache/arrow-rs/issues/9240)
-- \[Variant\] support `..` and `['fieldName']` syntax in the VariantPath parser [\#9050](https://github.com/apache/arrow-rs/issues/9050)
-- Support Float16 for create\_random\_array [\#9028](https://github.com/apache/arrow-rs/issues/9028)
+- Duplicate macro definition: `partially_shredded_variant_array_gen` [\#9492](https://github.com/apache/arrow-rs/issues/9492)
+- Enable `LargeList` / `ListView` / `LargeListView` for `VariantArray::try_new` [\#9455](https://github.com/apache/arrow-rs/issues/9455)
+- Support variables/expressions in record\_batch! macro [\#9245](https://github.com/apache/arrow-rs/issues/9245) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)]
 
 **Merged pull requests:**
 
-- Avoid allocating a `Vec` in `StructBuilder` [\#9428](https://github.com/apache/arrow-rs/pull/9428) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko))
-- fix: fixed trait functions clash get\_date\_time\_part\_extract\_fn \(\#8221\) [\#9424](https://github.com/apache/arrow-rs/pull/9424) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([esavier](https://github.com/esavier))
-- \[Minor\] Use per-predicate projection masks in arrow\_reader\_clickbench benchmark [\#9413](https://github.com/apache/arrow-rs/pull/9413) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
-- Fix `ArrowArrayStreamReader` for 0-columns record batch streams [\#9405](https://github.com/apache/arrow-rs/pull/9405) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jonded94](https://github.com/jonded94))
-- Use zstd::bulk API in IPC and Parquet with context reuse for compression and decompression [\#9400](https://github.com/apache/arrow-rs/pull/9400) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Reproduce the issue of \#9370 in a minimal, end-to-end way [\#9399](https://github.com/apache/arrow-rs/pull/9399) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jonded94](https://github.com/jonded94))
-- perf: optimize skipper for varint values used when projecting Avro record types [\#9397](https://github.com/apache/arrow-rs/pull/9397) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev))
-- fix: return error instead of panic on schema mismatch in BatchCoalescer::push\_batch [\#9390](https://github.com/apache/arrow-rs/pull/9390) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([bvolpato-dd](https://github.com/bvolpato-dd))
-- Minor: Add additional test coverage for WriterProperties::{max\_row\_group\_row\_count,max\_row\_group\_size} [\#9387](https://github.com/apache/arrow-rs/pull/9387) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- Moving invalid\_utf8 tests into a separate mod [\#9384](https://github.com/apache/arrow-rs/pull/9384) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
-- Update sysinfo requirement from 0.37.1 to 0.38.1 [\#9383](https://github.com/apache/arrow-rs/pull/9383) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
-- feat: support RunEndEncoded arrays in arrow-json reader and writer [\#9379](https://github.com/apache/arrow-rs/pull/9379) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Abhisheklearn12](https://github.com/Abhisheklearn12))
-- Remove lint issues in parquet-related code. [\#9375](https://github.com/apache/arrow-rs/pull/9375) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([brunal](https://github.com/brunal))
-- Add RunEndEncoded array comparator [\#9368](https://github.com/apache/arrow-rs/pull/9368) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
-- feat: support BinaryView in bit\_length kernel [\#9363](https://github.com/apache/arrow-rs/pull/9363) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Abhisheklearn12](https://github.com/Abhisheklearn12))
-- Add regression tests for Parquet large binary offset overflow [\#9361](https://github.com/apache/arrow-rs/pull/9361) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vigneshsiva11](https://github.com/vigneshsiva11))
-- feat: add max\_row\_group\_bytes option to WriterProperties [\#9357](https://github.com/apache/arrow-rs/pull/9357) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([yonipeleg33](https://github.com/yonipeleg33))
-- doc: remove disclaimer about `ListView` not being fully supported [\#9356](https://github.com/apache/arrow-rs/pull/9356) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Move row\_filter async tests from parquet async reader [\#9355](https://github.com/apache/arrow-rs/pull/9355) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
-- \[Parquet\] Allow setting page size per column [\#9353](https://github.com/apache/arrow-rs/pull/9353) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([XiangpengHao](https://github.com/XiangpengHao))
-- feat: Support roundtrip ListView in parquet arrow writer [\#9352](https://github.com/apache/arrow-rs/pull/9352) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([codephage2020](https://github.com/codephage2020))
-- feat: add ListView and LargeListView support to arrow-ord [\#9347](https://github.com/apache/arrow-rs/pull/9347) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
-- Support ListView in length kernel [\#9346](https://github.com/apache/arrow-rs/pull/9346) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([vegarsti](https://github.com/vegarsti))
-- feat: Add from\_datetime method to Timestamp types [\#9345](https://github.com/apache/arrow-rs/pull/9345) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
-- \[main\] Update version to 57.3.0, add changelog [\#9334](https://github.com/apache/arrow-rs/pull/9334) ([alamb](https://github.com/alamb))
-- build\(deps\): update pyo3 requirement from 0.27.1 to 0.28.0 [\#9331](https://github.com/apache/arrow-rs/pull/9331) ([dependabot[bot]](https://github.com/apps/dependabot))
-- Add `DataType::is_list` and `DataType::is_binary` [\#9327](https://github.com/apache/arrow-rs/pull/9327) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([AdamGS](https://github.com/AdamGS))
-- Fix string array equality when the values buffer is the same and only the offsets to access it differ [\#9325](https://github.com/apache/arrow-rs/pull/9325) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
--  perf: skip validation of dictionary keys if all null [\#9322](https://github.com/apache/arrow-rs/pull/9322) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
-- parquet: use rwlock instead of mutex in predicate cache [\#9319](https://github.com/apache/arrow-rs/pull/9319) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24))
-- nit: remove usused code [\#9318](https://github.com/apache/arrow-rs/pull/9318) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24))
-- Remove unnecessary Arc\<ArrayRef\> [\#9316](https://github.com/apache/arrow-rs/pull/9316) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
-- Optimize data page statistics conversion \(up to 4x\) [\#9303](https://github.com/apache/arrow-rs/pull/9303) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- \[regression\] Error with adaptive predicate pushdown: "Invalid offset in sparse column chunk data: 754, no matching page found." [\#9301](https://github.com/apache/arrow-rs/pull/9301) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
-- Improve `PrimitiveArray::from_iter` perf [\#9294](https://github.com/apache/arrow-rs/pull/9294) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Add additional Arrow type support  [\#9291](https://github.com/apache/arrow-rs/pull/9291) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
-- fix: ensure `BufferBuilder::truncate` doesn't overset length [\#9288](https://github.com/apache/arrow-rs/pull/9288) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Add benchmark for row group index reader perf [\#9285](https://github.com/apache/arrow-rs/pull/9285) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
-- fix union array row converter to handle non-sequential type ids [\#9283](https://github.com/apache/arrow-rs/pull/9283) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([friendlymatthew](https://github.com/friendlymatthew))
-- parquet: reduce clone in delta byte array decoder [\#9282](https://github.com/apache/arrow-rs/pull/9282) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([lyang24](https://github.com/lyang24))
-- fix: fix \[\[NULL\]\] array doesn't roundtrip in arrow-row bug [\#9275](https://github.com/apache/arrow-rs/pull/9275) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lichuang](https://github.com/lichuang))
-- Enhance list casting, adding more cases for list views [\#9274](https://github.com/apache/arrow-rs/pull/9274) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- \[Variant\] Add path index access tests for list [\#9273](https://github.com/apache/arrow-rs/pull/9273) ([liamzwbao](https://github.com/liamzwbao))
-- Factor out json reader's static make\_decoder args to a struct [\#9271](https://github.com/apache/arrow-rs/pull/9271) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
-- make\_decoder accepts borrowed DataType instead of owned [\#9270](https://github.com/apache/arrow-rs/pull/9270) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
-- Implement a more generic from\_nested\_iter method for list arrays [\#9268](https://github.com/apache/arrow-rs/pull/9268) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jhorstmann](https://github.com/jhorstmann))
-- Move extension type construction logic out of Field [\#9266](https://github.com/apache/arrow-rs/pull/9266) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([scovich](https://github.com/scovich))
-- fix: support casting string to f16 [\#9262](https://github.com/apache/arrow-rs/pull/9262) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Add additional coverage for StringViewArray comparisons [\#9257](https://github.com/apache/arrow-rs/pull/9257) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Upgrade to object store 0.13.1 [\#9256](https://github.com/apache/arrow-rs/pull/9256) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
-- \[Parquet\] test adaptive predicate pushdown with skipped page [\#9251](https://github.com/apache/arrow-rs/pull/9251) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
-- Speed up string view comparison \(up to 3x\) [\#9250](https://github.com/apache/arrow-rs/pull/9250) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- Add bench for LocalFileSystem [\#9248](https://github.com/apache/arrow-rs/pull/9248) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
-- \[Parquet\] Add test for reading/writing long UTF8 StringViews [\#9246](https://github.com/apache/arrow-rs/pull/9246) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- \[Parquet\] test adaptive predicate pushdown with skipped page [\#9243](https://github.com/apache/arrow-rs/pull/9243) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([erratic-pattern](https://github.com/erratic-pattern))
-- Add tests and fixes for schema resolution bug [\#9237](https://github.com/apache/arrow-rs/pull/9237) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
-- Revert "Seal Array trait \(\#9092\)", mark `Array` as `unsafe` [\#9234](https://github.com/apache/arrow-rs/pull/9234) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([gabotechs](https://github.com/gabotechs))
-- Speedup filter \(up to ~1.5x\) `FilterBuilder::Optimize`/`BitIndexIterator`/`iter_set_bits_rev` [\#9229](https://github.com/apache/arrow-rs/pull/9229) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- optimize `RowGroupIndexReader` for single row group reads [\#9226](https://github.com/apache/arrow-rs/pull/9226) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
-- test: improve arrow-row fuzz tests [\#9222](https://github.com/apache/arrow-rs/pull/9222) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
-- arrow-cast: support packing to Dictionary\(\_, Utf8View/BinaryView\) [\#9220](https://github.com/apache/arrow-rs/pull/9220) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([ethan-tyler](https://github.com/ethan-tyler))
-- Add additional test coverage for  `BatchCoalescer` push\_batch\_with\_filter [\#9218](https://github.com/apache/arrow-rs/pull/9218) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- \[Parquet\] Optimize appending max level comparison in DefinitionLevelDecoder [\#9217](https://github.com/apache/arrow-rs/pull/9217) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
-- Remove dead code to fix clippy failure on main [\#9215](https://github.com/apache/arrow-rs/pull/9215) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- \[Parquet\] perf: reuse seeked File clone in ChunkReader::get\_read\(\) [\#9214](https://github.com/apache/arrow-rs/pull/9214) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([fvaleye](https://github.com/fvaleye))
-- fix: \[9018\]Fixed RunArray slice offsets\(row, cast, eq\) [\#9213](https://github.com/apache/arrow-rs/pull/9213) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr))
-- Add benchmarks for reading struct arrays from parquet [\#9210](https://github.com/apache/arrow-rs/pull/9210) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jhorstmann](https://github.com/jhorstmann))
-- Support casting negative scale decimals to numeric [\#9207](https://github.com/apache/arrow-rs/pull/9207) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Chiicake](https://github.com/Chiicake))
-- Deprecate `ArrowReaderOptions::with_page_index` and update API [\#9199](https://github.com/apache/arrow-rs/pull/9199) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
-- arrow-ipc: add reset method to DictionaryTracker [\#9196](https://github.com/apache/arrow-rs/pull/9196) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([albertlockett](https://github.com/albertlockett))
-- Avoid a clone when creating `ListArray` from ArrayData [\#9194](https://github.com/apache/arrow-rs/pull/9194) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `ListViewArray` from ArrayData [\#9193](https://github.com/apache/arrow-rs/pull/9193) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `MapArray` from ArrayData [\#9192](https://github.com/apache/arrow-rs/pull/9192) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `NullArray` from ArrayData [\#9191](https://github.com/apache/arrow-rs/pull/9191) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `PrimitiveArray` from ArrayData [\#9190](https://github.com/apache/arrow-rs/pull/9190) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `RunEndArray` from ArrayData [\#9189](https://github.com/apache/arrow-rs/pull/9189) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `UnionArray` from ArrayData [\#9188](https://github.com/apache/arrow-rs/pull/9188) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `FixedSizeListArray` from ArrayData [\#9187](https://github.com/apache/arrow-rs/pull/9187) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `FixedSizeBinaryArray` from ArrayData [\#9186](https://github.com/apache/arrow-rs/pull/9186) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Avoid a clone when creating `DictionaryArray` from ArrayData [\#9185](https://github.com/apache/arrow-rs/pull/9185) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- fix: take\_run return empty array instead of panic. [\#9182](https://github.com/apache/arrow-rs/pull/9182) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([thorfour](https://github.com/thorfour))
-- lint: remove unused function \(fix clippy [\#9178](https://github.com/apache/arrow-rs/pull/9178) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
-- add `#[inline]` to `BitIterator` `next` function  [\#9177](https://github.com/apache/arrow-rs/pull/9177) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
-- Add ListView support to `arrow-row` and `arrow-ord` [\#9176](https://github.com/apache/arrow-rs/pull/9176) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- arrow-cast: Add display formatter for ListView [\#9175](https://github.com/apache/arrow-rs/pull/9175) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- Add BinaryFormatSupport and Row Encoder to `arrow-avro` Writer [\#9171](https://github.com/apache/arrow-rs/pull/9171) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
-- docs\(parquet\): move async parquet example into ArrowReaderBuilder docs [\#9167](https://github.com/apache/arrow-rs/pull/9167) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([vigneshsiva11](https://github.com/vigneshsiva11))
-- feat\(array\): add `RecordBatchStream` trait [\#9166](https://github.com/apache/arrow-rs/pull/9166) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([lidavidm](https://github.com/lidavidm))
-- refactor: streamline date64 tests [\#9165](https://github.com/apache/arrow-rs/pull/9165) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cht42](https://github.com/cht42))
-- docs: update examples in ArrowReaderOptions to use in-memory buffers [\#9163](https://github.com/apache/arrow-rs/pull/9163) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AndreaBozzo](https://github.com/AndreaBozzo))
-- Add Avro Reader projection API [\#9162](https://github.com/apache/arrow-rs/pull/9162) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([jecsand838](https://github.com/jecsand838))
-- Avoid a clone when creating StringArray/BinaryArray from ArrayData [\#9160](https://github.com/apache/arrow-rs/pull/9160) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- fix missing utf8 check for conversion from BinaryViewArray to StringViewArray [\#9158](https://github.com/apache/arrow-rs/pull/9158) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Minor: try and avoid an allocation creating `GenericByteViewArray` from `ArrayData` [\#9156](https://github.com/apache/arrow-rs/pull/9156) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add find\_nth\_set\_bit\_position [\#9151](https://github.com/apache/arrow-rs/pull/9151) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- feat: add null comparison handling in make\_comparator [\#9150](https://github.com/apache/arrow-rs/pull/9150) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
-- Uncomment part of test\_utf8\_single\_column\_reader\_test [\#9148](https://github.com/apache/arrow-rs/pull/9148) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sdf-jkl](https://github.com/sdf-jkl))
-- arrow-ipc: Add tests for nested dicts for Map and Union arrays [\#9146](https://github.com/apache/arrow-rs/pull/9146) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- Update ASF copyright year in NOTICE [\#9145](https://github.com/apache/arrow-rs/pull/9145) ([mohit7705](https://github.com/mohit7705))
-- Avoid panic on Date32 overflow [\#9144](https://github.com/apache/arrow-rs/pull/9144) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cht42](https://github.com/cht42))
-- feat: add `reserve` to `Rows` [\#9142](https://github.com/apache/arrow-rs/pull/9142) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
-- docs\(variant\): fix VariantObject::get documentation to reflect Option return type [\#9139](https://github.com/apache/arrow-rs/pull/9139) ([mohit7705](https://github.com/mohit7705))
-- Add `BooleanBufferBuilder::extend_trusted_len` [\#9137](https://github.com/apache/arrow-rs/pull/9137) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
-- fix: support cast from `Null` to list view/run encoded/union types [\#9134](https://github.com/apache/arrow-rs/pull/9134) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Fix clippy [\#9130](https://github.com/apache/arrow-rs/pull/9130) ([alamb](https://github.com/alamb))
-- Fix IPC roundtripping dicts nested in ListViews [\#9126](https://github.com/apache/arrow-rs/pull/9126) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brancz](https://github.com/brancz))
-- Update readme for geospatial crate [\#9124](https://github.com/apache/arrow-rs/pull/9124) ([paleolimbot](https://github.com/paleolimbot))
-- \[Parquet\] perf: Create `PrimitiveArray`s directly rather than via `ArrayData` [\#9122](https://github.com/apache/arrow-rs/pull/9122) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- \[Parquet\] perf: Create Utf8/BinaryViewArray directly rather than via `ArrayData` [\#9121](https://github.com/apache/arrow-rs/pull/9121) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([alamb](https://github.com/alamb))
-- \[parquet\] Add row group index virtual column [\#9117](https://github.com/apache/arrow-rs/pull/9117) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([friendlymatthew](https://github.com/friendlymatthew))
-- docs\(parquet\): add example for preserving dictionary encoding [\#9116](https://github.com/apache/arrow-rs/pull/9116) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([AndreaBozzo](https://github.com/AndreaBozzo))
-- doc: add example of RowFilter usage [\#9115](https://github.com/apache/arrow-rs/pull/9115) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([sonhmai](https://github.com/sonhmai))
-- docs: Update release schedule in README.md [\#9111](https://github.com/apache/arrow-rs/pull/9111) ([alamb](https://github.com/alamb))
-- feat: add benchmarks for json parser [\#9107](https://github.com/apache/arrow-rs/pull/9107) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Weijun-H](https://github.com/Weijun-H))
-- chore: switch test from `bincode` to maintained `postcard` crate \(RUSTSEC-2025-0141 \) [\#9104](https://github.com/apache/arrow-rs/pull/9104) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- Add nullif\_kernel benchmark [\#9089](https://github.com/apache/arrow-rs/pull/9089) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([alamb](https://github.com/alamb))
-- \[Variant\] Support Shredded Lists/Array in `variant_get` [\#9049](https://github.com/apache/arrow-rs/pull/9049) ([liamzwbao](https://github.com/liamzwbao))
-- fix:\[9018\]Fixed RunArray slice offsets [\#9036](https://github.com/apache/arrow-rs/pull/9036) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([manishkr](https://github.com/manishkr))
-- Support Float16 for create\_random\_array [\#9029](https://github.com/apache/arrow-rs/pull/9029) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([niebayes](https://github.com/niebayes))
-- fix: display `0 secs` for empty DayTime/MonthDayNano intervals [\#9023](https://github.com/apache/arrow-rs/pull/9023) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Jefffrey](https://github.com/Jefffrey))
-- Add options to skip decoding `Statistics` and `SizeStatistics` in Parquet metadata [\#9008](https://github.com/apache/arrow-rs/pull/9008) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- \[Variant\] Add unshred\_variant support for Binary and LargeBinary types [\#9576](https://github.com/apache/arrow-rs/pull/9576) ([kunalsinghdadhwal](https://github.com/kunalsinghdadhwal))
+- \[Variant\] Add `variant_to_arrow` `Struct` type support [\#9572](https://github.com/apache/arrow-rs/pull/9572) ([sdf-jkl](https://github.com/sdf-jkl))
+- Make Sbbf Constructers Public [\#9569](https://github.com/apache/arrow-rs/pull/9569) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([cetra3](https://github.com/cetra3))
+- fix: Used `checked_add` for bounds checks to avoid UB [\#9568](https://github.com/apache/arrow-rs/pull/9568) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([etseidl](https://github.com/etseidl))
+- Add mutable operations to BooleanBuffer \(Bit\*Assign\) [\#9567](https://github.com/apache/arrow-rs/pull/9567) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- chore\(deps\): update lz4\_flex requirement from 0.12 to 0.13 [\#9565](https://github.com/apache/arrow-rs/pull/9565) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- arrow-select: fix MutableArrayData interleave for ListView [\#9560](https://github.com/apache/arrow-rs/pull/9560) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([asubiotto](https://github.com/asubiotto))
+- Move `ValueIter` into own module, and add public `record_count` function [\#9557](https://github.com/apache/arrow-rs/pull/9557) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Rafferty97](https://github.com/Rafferty97))
+- arrow-flight: generate dict\_ids for dicts nested inside complex types [\#9556](https://github.com/apache/arrow-rs/pull/9556) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] [[arrow-flight](https://github.com/apache/arrow-rs/labels/arrow-flight)] ([asubiotto](https://github.com/asubiotto))
+- add `shred_variant` support for `LargeUtf8` and `LargeBinary` [\#9554](https://github.com/apache/arrow-rs/pull/9554) ([sdf-jkl](https://github.com/sdf-jkl))
+- \[minor\] Download clickbench file when missing [\#9553](https://github.com/apache/arrow-rs/pull/9553) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
+- DeltaBitPackEncoderConversion: Fix panic message on invalid type [\#9552](https://github.com/apache/arrow-rs/pull/9552) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([progval](https://github.com/progval))
+- Replace interleave overflow panic with error [\#9549](https://github.com/apache/arrow-rs/pull/9549) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([xudong963](https://github.com/xudong963))
+- feat\(arrow-avro\): `HeaderInfo` to expose OCF header [\#9548](https://github.com/apache/arrow-rs/pull/9548) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev))
+- chore: Protect `main` branch with required reviews [\#9547](https://github.com/apache/arrow-rs/pull/9547) ([comphead](https://github.com/comphead))
+- Add benchmark for `infer_json_schema` [\#9546](https://github.com/apache/arrow-rs/pull/9546) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Rafferty97](https://github.com/Rafferty97))
+- chore\(deps\): bump black from 24.3.0 to 26.3.1 in /parquet/pytest [\#9545](https://github.com/apache/arrow-rs/pull/9545) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- Unroll interleave -25-30% [\#9542](https://github.com/apache/arrow-rs/pull/9542) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Dandandan](https://github.com/Dandandan))
+- Optimize `take_fixed_size_binary` For Predefined Value Lengths [\#9535](https://github.com/apache/arrow-rs/pull/9535) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([tobixdev](https://github.com/tobixdev))
+- feat: expose arrow schema on async avro reader [\#9534](https://github.com/apache/arrow-rs/pull/9534) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev))
+- Make with\_file\_decryption\_properties pub instead of pub\(crate\) [\#9532](https://github.com/apache/arrow-rs/pull/9532) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([Dandandan](https://github.com/Dandandan))
+- fix: handle Null type in try\_merge for Struct, List, LargeList, and Union [\#9524](https://github.com/apache/arrow-rs/pull/9524) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([zhuqi-lucas](https://github.com/zhuqi-lucas))
+- chore: extend record\_batch macro to support variables and expressions [\#9522](https://github.com/apache/arrow-rs/pull/9522) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([buraksenn](https://github.com/buraksenn))
+- \[Variant\] clean up `variant_get` tests [\#9518](https://github.com/apache/arrow-rs/pull/9518) ([sdf-jkl](https://github.com/sdf-jkl))
+- support large string for unshred variant [\#9515](https://github.com/apache/arrow-rs/pull/9515) ([friendlymatthew](https://github.com/friendlymatthew))
+- support string view unshred variant [\#9514](https://github.com/apache/arrow-rs/pull/9514) ([friendlymatthew](https://github.com/friendlymatthew))
+- Add has\_true\(\) and has\_false\(\) to BooleanArray [\#9511](https://github.com/apache/arrow-rs/pull/9511) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([adriangb](https://github.com/adriangb))
+- Fix Invalid offset in sparse column chunk data error for multiple predicates [\#9509](https://github.com/apache/arrow-rs/pull/9509) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([cetra3](https://github.com/cetra3))
+- fix: remove incorrect debug assertion in BatchCoalescer  [\#9508](https://github.com/apache/arrow-rs/pull/9508) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Tim-53](https://github.com/Tim-53))
+- \[Json\] Add benchmarks for list json reader [\#9507](https://github.com/apache/arrow-rs/pull/9507) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
+- fix: first next\_back\(\) on new RowsIter panics [\#9505](https://github.com/apache/arrow-rs/pull/9505) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Add some benchmarks for decoding delta encoded Parquet [\#9500](https://github.com/apache/arrow-rs/pull/9500) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- chore: remove duplicate macro `partially_shredded_variant_array_gen` [\#9498](https://github.com/apache/arrow-rs/pull/9498) ([codephage2020](https://github.com/codephage2020))
+- Deprecate ArrowTimestampType::make\_value in favor of from\_naive\_datetime [\#9491](https://github.com/apache/arrow-rs/pull/9491) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([codephage2020](https://github.com/codephage2020))
+- fix: Do not assume missing nullcount stat means zero nullcount [\#9481](https://github.com/apache/arrow-rs/pull/9481) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([scovich](https://github.com/scovich))
+- \[Variant\] Enahcne bracket access for VariantPath [\#9479](https://github.com/apache/arrow-rs/pull/9479) ([klion26](https://github.com/klion26))
+- Optimize delta binary decoder in the case where bitwidth=0 [\#9477](https://github.com/apache/arrow-rs/pull/9477) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([etseidl](https://github.com/etseidl))
+- Add PrimitiveRunBuilder::with\_data\_type\(\) to customize the values' DataType [\#9473](https://github.com/apache/arrow-rs/pull/9473) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
+- Convert `prettyprint` tests in `arrow-cast` to `insta` inline snapshots [\#9472](https://github.com/apache/arrow-rs/pull/9472) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([grtlr](https://github.com/grtlr))
+- Update strum\_macros requirement from 0.27 to 0.28 [\#9471](https://github.com/apache/arrow-rs/pull/9471) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([dependabot[bot]](https://github.com/apps/dependabot))
+- docs\(parquet\): Fix broken links in README [\#9467](https://github.com/apache/arrow-rs/pull/9467) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([SYaoJun](https://github.com/SYaoJun))
+- Add list-like types support to VariantArray::try\_new [\#9457](https://github.com/apache/arrow-rs/pull/9457) ([sdf-jkl](https://github.com/sdf-jkl))
+- Simplify downcast\_...!\(\) macro definitions [\#9454](https://github.com/apache/arrow-rs/pull/9454) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
+- feat\(parquet\): add content defined chunking for arrow writer [\#9450](https://github.com/apache/arrow-rs/pull/9450) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([kszucs](https://github.com/kszucs))
+- refactor: simplify iterator using cloned\(\).map\(Some\) [\#9449](https://github.com/apache/arrow-rs/pull/9449) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([SYaoJun](https://github.com/SYaoJun))
+- feat: Optimize from\_bitwise\_binary\_op with 64-bit alignment [\#9441](https://github.com/apache/arrow-rs/pull/9441) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([kunalsinghdadhwal](https://github.com/kunalsinghdadhwal))
+- docs: fix markdown link syntax in README [\#9440](https://github.com/apache/arrow-rs/pull/9440) ([SYaoJun](https://github.com/SYaoJun))
+- Move `ListLikeArray` to arrow-array to be shared with json writer and parquet unshredding [\#9437](https://github.com/apache/arrow-rs/pull/9437) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([liamzwbao](https://github.com/liamzwbao))
+- Add `claim` method to recordbatch for memory accounting [\#9433](https://github.com/apache/arrow-rs/pull/9433) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([cetra3](https://github.com/cetra3))
+- Add `append_nulls` to `MapBuilder` [\#9432](https://github.com/apache/arrow-rs/pull/9432) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko))
+- Add `append_non_nulls` to `StructBuilder` [\#9430](https://github.com/apache/arrow-rs/pull/9430) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko))
+- Add `append_value_n` to GenericByteBuilder [\#9426](https://github.com/apache/arrow-rs/pull/9426) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Fokko](https://github.com/Fokko))
+- refactor: simplify dynamic state for Avro record projection [\#9419](https://github.com/apache/arrow-rs/pull/9419) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev))
+- Add `NullBuffer::from_unsliced_buffer` helper and refactor call sites [\#9411](https://github.com/apache/arrow-rs/pull/9411) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([Eyad3skr](https://github.com/Eyad3skr))
+- Implement min, max, sum for run-end-encoded arrays. [\#9409](https://github.com/apache/arrow-rs/pull/9409) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([brunal](https://github.com/brunal))
+- feat: add `RunArray::new_unchecked` and `RunArray::into_parts` [\#9376](https://github.com/apache/arrow-rs/pull/9376) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([rluvaton](https://github.com/rluvaton))
+- Fix skip\_records over-counting when partial record precedes num\_rows page skip [\#9374](https://github.com/apache/arrow-rs/pull/9374) [[parquet](https://github.com/apache/arrow-rs/labels/parquet)] ([jonded94](https://github.com/jonded94))
+- fix: resolution of complex type variants in Avro unions [\#9328](https://github.com/apache/arrow-rs/pull/9328) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev))
+- feat\(arrow-avro\): Configurable Arrow timezone ID for Avro timestamps [\#9280](https://github.com/apache/arrow-rs/pull/9280) [[arrow](https://github.com/apache/arrow-rs/labels/arrow)] ([mzabaluev](https://github.com/mzabaluev))
 
 
 
diff --git a/Cargo.toml b/Cargo.toml
index 8b51c01acab6..4ca4b068f65a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -68,7 +68,7 @@ exclude = [
 ]
 
 [workspace.package]
-version = "58.0.0"
+version = "58.1.0"
 homepage = "https://github.com/apache/arrow-rs"
 repository = "https://github.com/apache/arrow-rs"
 authors = ["Apache Arrow <dev@arrow.apache.org>"]
@@ -85,33 +85,37 @@ edition = "2024"
 rust-version = "1.85"
 
 [workspace.dependencies]
-arrow = { version = "58.0.0", path = "./arrow", default-features = false }
-arrow-arith = { version = "58.0.0", path = "./arrow-arith" }
-arrow-array = { version = "58.0.0", path = "./arrow-array" }
-arrow-buffer = { version = "58.0.0", path = "./arrow-buffer" }
-arrow-cast = { version = "58.0.0", path = "./arrow-cast" }
-arrow-csv = { version = "58.0.0", path = "./arrow-csv" }
-arrow-data = { version = "58.0.0", path = "./arrow-data" }
-arrow-ipc = { version = "58.0.0", path = "./arrow-ipc" }
-arrow-json = { version = "58.0.0", path = "./arrow-json" }
-arrow-ord = { version = "58.0.0", path = "./arrow-ord" }
-arrow-pyarrow = { version = "58.0.0", path = "./arrow-pyarrow" }
-arrow-row = { version = "58.0.0", path = "./arrow-row" }
-arrow-schema = { version = "58.0.0", path = "./arrow-schema" }
-arrow-select = { version = "58.0.0", path = "./arrow-select" }
-arrow-string = { version = "58.0.0", path = "./arrow-string" }
-parquet = { version = "58.0.0", path = "./parquet", default-features = false }
-parquet-geospatial = { version = "58.0.0", path = "./parquet-geospatial" }
-parquet-variant = { version = "58.0.0", path = "./parquet-variant" }
-parquet-variant-json = { version = "58.0.0", path = "./parquet-variant-json" }
-parquet-variant-compute = { version = "58.0.0", path = "./parquet-variant-compute" }
+arrow = { version = "58.1.0", path = "./arrow", default-features = false }
+arrow-arith = { version = "58.1.0", path = "./arrow-arith" }
+arrow-array = { version = "58.1.0", path = "./arrow-array" }
+arrow-buffer = { version = "58.1.0", path = "./arrow-buffer" }
+arrow-cast = { version = "58.1.0", path = "./arrow-cast" }
+arrow-csv = { version = "58.1.0", path = "./arrow-csv" }
+arrow-data = { version = "58.1.0", path = "./arrow-data" }
+arrow-ipc = { version = "58.1.0", path = "./arrow-ipc" }
+arrow-json = { version = "58.1.0", path = "./arrow-json" }
+arrow-ord = { version = "58.1.0", path = "./arrow-ord" }
+arrow-pyarrow = { version = "58.1.0", path = "./arrow-pyarrow" }
+arrow-row = { version = "58.1.0", path = "./arrow-row" }
+arrow-schema = { version = "58.1.0", path = "./arrow-schema" }
+arrow-select = { version = "58.1.0", path = "./arrow-select" }
+arrow-string = { version = "58.1.0", path = "./arrow-string" }
+parquet = { version = "58.1.0", path = "./parquet", default-features = false }
+parquet-geospatial = { version = "58.1.0", path = "./parquet-geospatial" }
+parquet-variant = { version = "58.1.0", path = "./parquet-variant" }
+parquet-variant-json = { version = "58.1.0", path = "./parquet-variant-json" }
+parquet-variant-compute = { version = "58.1.0", path = "./parquet-variant-compute" }
 
 chrono = { version = "0.4.40", default-features = false, features = ["clock"] }
 
-simdutf8 = { version = "0.1.5", default-features = false }
-
 criterion = { version = "0.8.0", default-features = false }
 
+insta = { version = "1.46.3", default-features = false }
+
+object_store = { version = "0.13.2", default-features = false }
+
+simdutf8 = { version = "0.1.5", default-features = false }
+
 # release inherited profile keeping debug information and symbols
 # for mem/cpu profiling
 [profile.profiling]
diff --git a/README.md b/README.md
index 27e0ca13c179..70f2f158e2f4 100644
--- a/README.md
+++ b/README.md
@@ -91,14 +91,10 @@ Planned Release Schedule
 
 | Approximate Date | Version    | Notes                                   |
 | ---------------- | ---------- | --------------------------------------- |
-| December 2025    | [`57.2.0`] | Minor, NO breaking API changes          |
-| January 2026     | [`58.0.0`] | Major, potentially breaking API changes |
-| February 2026    | [`58.1.0`] | Minor, NO breaking API changes          |
-| March 2026       | [`58.2.0`] | Minor, NO breaking API changes          |
-| April 2026       | [`59.0.0`] | Major, potentially breaking API changes |
-
-[`57.2.0`]: https://github.com/apache/arrow-rs/milestone/5
-[`58.0.0`]: https://github.com/apache/arrow-rs/milestone/6
+| March 2026       | [`58.1.0`] | Minor, NO breaking API changes          |
+| April 2026       | [`58.2.0`] | Minor, NO breaking API changes          |
+| May 2026         | [`59.0.0`] | Major, potentially breaking API changes |
+
 [`58.1.0`]: https://github.com/apache/arrow-rs/issues/9108
 [`58.2.0`]: https://github.com/apache/arrow-rs/issues/9109
 [`59.0.0`]: https://github.com/apache/arrow-rs/issues/9110
diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs
index a043259694c1..59792d0c5b1d 100644
--- a/arrow-arith/src/aggregate.rs
+++ b/arrow-arith/src/aggregate.rs
@@ -540,7 +540,7 @@ pub fn min_string_view(array: &StringViewArray) -> Option<&str> {
 /// Returns the sum of values in the array.
 ///
 /// This doesn't detect overflow. Once overflowing, the result will wrap around.
-/// For an overflow-checking variant, use `sum_array_checked` instead.
+/// For an overflow-checking variant, use [`sum_array_checked`] instead.
 pub fn sum_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
 where
     T: ArrowNumericType,
@@ -567,6 +567,12 @@ where
 
             Some(sum)
         }
+        DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
+            DataType::Int16 => ree::sum_wrapping::<types::Int16Type, T>(&array),
+            DataType::Int32 => ree::sum_wrapping::<types::Int32Type, T>(&array),
+            DataType::Int64 => ree::sum_wrapping::<types::Int64Type, T>(&array),
+            _ => unreachable!(),
+        },
         _ => sum::<T>(as_primitive_array(&array)),
     }
 }
@@ -574,7 +580,9 @@ where
 /// Returns the sum of values in the array.
 ///
 /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
-/// use `sum_array` instead.
+/// use [`sum_array`] instead.
+/// Additionally returns an `Err` on run-end-encoded arrays with a provided
+/// values type parameter that is incorrect.
 pub fn sum_array_checked<T, A: ArrayAccessor<Item = T::Native>>(
     array: A,
 ) -> Result<Option<T::Native>, ArrowError>
@@ -603,10 +611,110 @@ where
 
             Ok(Some(sum))
         }
+        DataType::RunEndEncoded(run_ends, _) => match run_ends.data_type() {
+            DataType::Int16 => ree::sum_checked::<types::Int16Type, T>(&array),
+            DataType::Int32 => ree::sum_checked::<types::Int32Type, T>(&array),
+            DataType::Int64 => ree::sum_checked::<types::Int64Type, T>(&array),
+            _ => unreachable!(),
+        },
         _ => sum_checked::<T>(as_primitive_array(&array)),
     }
 }
 
+// Logic for summing run-end-encoded arrays.
+mod ree {
+    use std::convert::Infallible;
+
+    use arrow_array::cast::AsArray;
+    use arrow_array::types::RunEndIndexType;
+    use arrow_array::{Array, ArrowNativeTypeOp, ArrowNumericType, PrimitiveArray, TypedRunArray};
+    use arrow_buffer::ArrowNativeType;
+    use arrow_schema::ArrowError;
+
+    /// Downcasts an array to a TypedRunArray.
+    fn downcast<'a, I: RunEndIndexType, V: ArrowNumericType>(
+        array: &'a dyn Array,
+    ) -> Option<TypedRunArray<'a, I, PrimitiveArray<V>>> {
+        let array = array.as_run_opt::<I>()?;
+        // We only support RunArray wrapping primitive types.
+        array.downcast::<PrimitiveArray<V>>()
+    }
+
+    /// Computes the sum (wrapping) of the array values.
+    pub(super) fn sum_wrapping<I: RunEndIndexType, V: ArrowNumericType>(
+        array: &dyn Array,
+    ) -> Option<V::Native> {
+        let ree = downcast::<I, V>(array)?;
+        let Ok(sum) = fold(ree, |acc, val, len| -> Result<V::Native, Infallible> {
+            Ok(acc.add_wrapping(val.mul_wrapping(V::Native::usize_as(len))))
+        });
+        sum
+    }
+
+    /// Computes the sum (erroring on overflow) of the array values.
+    pub(super) fn sum_checked<I: RunEndIndexType, V: ArrowNumericType>(
+        array: &dyn Array,
+    ) -> Result<Option<V::Native>, ArrowError> {
+        let Some(ree) = downcast::<I, V>(array) else {
+            return Err(ArrowError::InvalidArgumentError(
+                "Input run array values are not a PrimitiveArray".to_string(),
+            ));
+        };
+        fold(ree, |acc, val, len| -> Result<V::Native, ArrowError> {
+            let Some(len) = V::Native::from_usize(len) else {
+                return Err(ArrowError::ArithmeticOverflow(format!(
+                    "Cannot convert a run-end index ({:?}) to the value type ({})",
+                    len,
+                    std::any::type_name::<V::Native>()
+                )));
+            };
+            acc.add_checked(val.mul_checked(len)?)
+        })
+    }
+
+    /// Folds over the values in a run-end-encoded array.
+    fn fold<'a, I: RunEndIndexType, V: ArrowNumericType, F, E>(
+        array: TypedRunArray<'a, I, PrimitiveArray<V>>,
+        mut f: F,
+    ) -> Result<Option<V::Native>, E>
+    where
+        F: FnMut(V::Native, V::Native, usize) -> Result<V::Native, E>,
+    {
+        let run_ends = array.run_ends();
+        let logical_start = run_ends.offset();
+        let logical_end = run_ends.offset() + run_ends.len();
+        let run_ends = run_ends.sliced_values();
+
+        let values_slice = array.run_array().values_slice();
+        let values = values_slice
+            .as_any()
+            .downcast_ref::<PrimitiveArray<V>>()
+            // Safety: we know the values array is PrimitiveArray<V>.
+            .unwrap();
+
+        let mut prev_end = 0;
+        let mut acc = V::Native::ZERO;
+        let mut has_non_null_value = false;
+
+        for (run_end, value) in run_ends.zip(values) {
+            let current_run_end = run_end.as_usize().clamp(logical_start, logical_end);
+            let run_length = current_run_end - prev_end;
+
+            if let Some(value) = value {
+                has_non_null_value = true;
+                acc = f(acc, value, run_length)?;
+            }
+
+            prev_end = current_run_end;
+            if current_run_end == logical_end {
+                break;
+            }
+        }
+
+        Ok(if has_non_null_value { Some(acc) } else { None })
+    }
+}
+
 /// Returns the min of values in the array of `ArrowNumericType` type, or dictionary
 /// array with value of `ArrowNumericType` type.
 pub fn min_array<T, A: ArrayAccessor<Item = T::Native>>(array: A) -> Option<T::Native>
@@ -639,6 +747,20 @@ where
 {
     match array.data_type() {
         DataType::Dictionary(_, _) => min_max_helper::<T::Native, _, _>(array, cmp),
+        DataType::RunEndEncoded(run_ends, _) => {
+            // We can directly perform min/max on the values child array, as any
+            // run must have non-zero length.
+            let array: &dyn Array = &array;
+            let values = match run_ends.data_type() {
+                DataType::Int16 => array.as_run_opt::<types::Int16Type>()?.values_slice(),
+                DataType::Int32 => array.as_run_opt::<types::Int32Type>()?.values_slice(),
+                DataType::Int64 => array.as_run_opt::<types::Int64Type>()?.values_slice(),
+                _ => return None,
+            };
+            // We only support RunArray wrapping primitive types.
+            let values = values.as_any().downcast_ref::<PrimitiveArray<T>>()?;
+            m(values)
+        }
         _ => m(as_primitive_array(&array)),
     }
 }
@@ -751,7 +873,7 @@ pub fn bool_or(array: &BooleanArray) -> Option<bool> {
 /// Returns `Ok(None)` if the array is empty or only contains null values.
 ///
 /// This detects overflow and returns an `Err` for that. For an non-overflow-checking variant,
-/// use `sum` instead.
+/// use [`sum`] instead.
 pub fn sum_checked<T>(array: &PrimitiveArray<T>) -> Result<Option<T::Native>, ArrowError>
 where
     T: ArrowNumericType,
@@ -799,7 +921,7 @@ where
 /// Returns `None` if the array is empty or only contains null values.
 ///
 /// This doesn't detect overflow in release mode by default. Once overflowing, the result will
-/// wrap around. For an overflow-checking variant, use `sum_checked` instead.
+/// wrap around. For an overflow-checking variant, use [`sum_checked`] instead.
 pub fn sum<T: ArrowNumericType>(array: &PrimitiveArray<T>) -> Option<T::Native>
 where
     T::Native: ArrowNativeTypeOp,
@@ -1750,4 +1872,170 @@ mod tests {
         sum_checked(&a).expect_err("overflow should be detected");
         sum_array_checked::<Int32Type, _>(&a).expect_err("overflow should be detected");
     }
+
+    /// Helper for building a RunArray.
+    fn make_run_array<'a, I: RunEndIndexType, V: ArrowNumericType, ItemType>(
+        values: impl IntoIterator<Item = &'a ItemType>,
+    ) -> RunArray<I>
+    where
+        ItemType: Clone + Into<Option<V::Native>> + 'static,
+    {
+        let mut builder = arrow_array::builder::PrimitiveRunBuilder::<I, V>::new();
+        for v in values.into_iter() {
+            builder.append_option((*v).clone().into());
+        }
+        builder.finish()
+    }
+
+    #[test]
+    fn test_ree_sum_array_basic() {
+        let run_array = make_run_array::<Int16Type, Int32Type, _>(&[10, 10, 20, 30, 30, 30]);
+        let typed_array = run_array.downcast::<Int32Array>().unwrap();
+
+        let result = sum_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, Some(130));
+
+        let result = sum_array_checked::<Int32Type, _>(typed_array).unwrap();
+        assert_eq!(result, Some(130));
+    }
+
+    #[test]
+    fn test_ree_sum_array_empty() {
+        let run_array = make_run_array::<Int16Type, Int32Type, i32>(&[]);
+        let typed_array = run_array.downcast::<Int32Array>().unwrap();
+
+        let result = sum_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, None);
+
+        let result = sum_array_checked::<Int32Type, _>(typed_array).unwrap();
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_ree_sum_array_with_nulls() {
+        let run_array =
+            make_run_array::<Int16Type, Int32Type, _>(&[Some(10), None, Some(20), None, Some(30)]);
+        let typed_array = run_array.downcast::<Int32Array>().unwrap();
+
+        let result = sum_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, Some(60));
+
+        let result = sum_array_checked::<Int32Type, _>(typed_array).unwrap();
+        assert_eq!(result, Some(60));
+    }
+
+    #[test]
+    fn test_ree_sum_array_with_only_nulls() {
+        let run_array = make_run_array::<Int16Type, Int16Type, _>(&[None, None, None, None, None]);
+        let typed_array = run_array.downcast::<Int16Array>().unwrap();
+
+        let result = sum_array::<Int16Type, _>(typed_array);
+        assert_eq!(result, None);
+
+        let result = sum_array_checked::<Int16Type, _>(typed_array).unwrap();
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_ree_sum_array_overflow() {
+        let run_array = make_run_array::<Int16Type, Int8Type, _>(&[126, 2]);
+        let typed_array = run_array.downcast::<Int8Array>().unwrap();
+
+        // i8 range is -128..=127. 126+2 overflows to -128.
+        let result = sum_array::<Int8Type, _>(typed_array);
+        assert_eq!(result, Some(-128));
+
+        let result = sum_array_checked::<Int8Type, _>(typed_array);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_ree_sum_array_sliced() {
+        let run_array = make_run_array::<Int16Type, UInt8Type, _>(&[0, 10, 10, 10, 20, 30, 30, 30]);
+        // Skip 2 values at the start and 1 at the end.
+        let sliced = run_array.slice(2, 5);
+        let typed_array = sliced.downcast::<UInt8Array>().unwrap();
+
+        let result = sum_array::<UInt8Type, _>(typed_array);
+        assert_eq!(result, Some(100));
+
+        let result = sum_array_checked::<UInt8Type, _>(typed_array).unwrap();
+        assert_eq!(result, Some(100));
+    }
+
+    #[test]
+    fn test_ree_min_max_array_basic() {
+        let run_array = make_run_array::<Int16Type, Int32Type, _>(&[30, 30, 10, 20, 20]);
+        let typed_array = run_array.downcast::<Int32Array>().unwrap();
+
+        let result = min_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, Some(10));
+
+        let result = max_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, Some(30));
+    }
+
+    #[test]
+    fn test_ree_min_max_array_empty() {
+        let run_array = make_run_array::<Int16Type, Int32Type, i32>(&[]);
+        let typed_array = run_array.downcast::<Int32Array>().unwrap();
+
+        let result = min_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, None);
+
+        let result = max_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, None);
+    }
+
+    #[test]
+    fn test_ree_min_max_array_float() {
+        let run_array = make_run_array::<Int16Type, Float64Type, _>(&[5.5, 5.5, 2.1, 8.9, 8.9]);
+        let typed_array = run_array.downcast::<Float64Array>().unwrap();
+
+        let result = min_array::<Float64Type, _>(typed_array);
+        assert_eq!(result, Some(2.1));
+
+        let result = max_array::<Float64Type, _>(typed_array);
+        assert_eq!(result, Some(8.9));
+    }
+
+    #[test]
+    fn test_ree_min_max_array_with_nulls() {
+        let run_array = make_run_array::<Int16Type, UInt8Type, _>(&[None, Some(10)]);
+        let typed_array = run_array.downcast::<UInt8Array>().unwrap();
+
+        let result = min_array::<UInt8Type, _>(typed_array);
+        assert_eq!(result, Some(10));
+
+        let result = max_array::<UInt8Type, _>(typed_array);
+        assert_eq!(result, Some(10));
+    }
+
+    #[test]
+    fn test_ree_min_max_array_sliced() {
+        let run_array = make_run_array::<Int16Type, Int32Type, _>(&[0, 30, 30, 10, 20, 20, 100]);
+        // Skip 1 value at the start and 1 at the end.
+        let sliced = run_array.slice(1, 5);
+        let typed_array = sliced.downcast::<Int32Array>().unwrap();
+
+        let result = min_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, Some(10));
+
+        let result = max_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, Some(30));
+    }
+
+    #[test]
+    fn test_ree_min_max_array_sliced_mid_run() {
+        let run_array = make_run_array::<Int16Type, Int32Type, _>(&[0, 0, 30, 10, 20, 100, 100]);
+        // Skip 1 value at the start and 1 at the end.
+        let sliced = run_array.slice(1, 5);
+        let typed_array = sliced.downcast::<Int32Array>().unwrap();
+
+        let result = min_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, Some(0));
+
+        let result = max_array::<Int32Type, _>(typed_array);
+        assert_eq!(result, Some(100));
+    }
 }
diff --git a/arrow-arith/src/numeric.rs b/arrow-arith/src/numeric.rs
index a57ba67544b7..f5a844ffd280 100644
--- a/arrow-arith/src/numeric.rs
+++ b/arrow-arith/src/numeric.rs
@@ -1320,7 +1320,10 @@ mod tests {
             "1960-01-30T04:23:20Z",
         ]
         .into_iter()
-        .map(|x| T::make_value(DateTime::parse_from_rfc3339(x).unwrap().naive_utc()).unwrap())
+        .map(|x| {
+            T::from_naive_datetime(DateTime::parse_from_rfc3339(x).unwrap().naive_utc(), None)
+                .unwrap()
+        })
         .collect();
 
         let a = PrimitiveArray::<T>::new(values, None);
diff --git a/arrow-array/Cargo.toml b/arrow-array/Cargo.toml
index a046fea2b0dc..da8ef98a1084 100644
--- a/arrow-array/Cargo.toml
+++ b/arrow-array/Cargo.toml
@@ -58,6 +58,8 @@ all-features = true
 async = ["dep:futures"]
 ffi = ["arrow-schema/ffi", "arrow-data/ffi"]
 force_validate = []
+# Enable memory tracking support
+pool = ["arrow-buffer/pool", "arrow-data/pool"]
 
 [dev-dependencies]
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
@@ -90,3 +92,7 @@ harness = false
 [[bench]]
 name = "record_batch"
 harness = false
+
+[[bench]]
+name = "boolean_array"
+harness = false
diff --git a/arrow-array/benches/boolean_array.rs b/arrow-array/benches/boolean_array.rs
new file mode 100644
index 000000000000..03b601075bb8
--- /dev/null
+++ b/arrow-array/benches/boolean_array.rs
@@ -0,0 +1,77 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::BooleanArray;
+use criterion::*;
+use std::hint;
+
+fn criterion_benchmark(c: &mut Criterion) {
+    for len in [64, 1024, 65536] {
+        // All true (no nulls)
+        let all_true = BooleanArray::from(vec![true; len]);
+        c.bench_function(&format!("true_count(all_true, {len})"), |b| {
+            b.iter(|| hint::black_box(&all_true).true_count());
+        });
+        c.bench_function(&format!("has_true(all_true, {len})"), |b| {
+            b.iter(|| hint::black_box(&all_true).has_true());
+        });
+        c.bench_function(&format!("has_false(all_true, {len})"), |b| {
+            b.iter(|| hint::black_box(&all_true).has_false());
+        });
+
+        // All false (no nulls)
+        let all_false = BooleanArray::from(vec![false; len]);
+        c.bench_function(&format!("true_count(all_false, {len})"), |b| {
+            b.iter(|| hint::black_box(&all_false).true_count());
+        });
+        c.bench_function(&format!("has_true(all_false, {len})"), |b| {
+            b.iter(|| hint::black_box(&all_false).has_true());
+        });
+        c.bench_function(&format!("has_false(all_false, {len})"), |b| {
+            b.iter(|| hint::black_box(&all_false).has_false());
+        });
+
+        // Mixed: first element differs (best-case short-circuit)
+        let mut mixed_early: Vec<bool> = vec![true; len];
+        mixed_early[0] = false;
+        let mixed_early = BooleanArray::from(mixed_early);
+        c.bench_function(&format!("true_count(mixed_early, {len})"), |b| {
+            b.iter(|| hint::black_box(&mixed_early).true_count());
+        });
+        c.bench_function(&format!("has_false(mixed_early, {len})"), |b| {
+            b.iter(|| hint::black_box(&mixed_early).has_false());
+        });
+
+        // With nulls: all valid values true
+        let with_nulls: Vec<Option<bool>> = (0..len)
+            .map(|i| if i % 10 == 0 { None } else { Some(true) })
+            .collect();
+        let with_nulls = BooleanArray::from(with_nulls);
+        c.bench_function(&format!("true_count(nulls_all_true, {len})"), |b| {
+            b.iter(|| hint::black_box(&with_nulls).true_count());
+        });
+        c.bench_function(&format!("has_true(nulls_all_true, {len})"), |b| {
+            b.iter(|| hint::black_box(&with_nulls).has_true());
+        });
+        c.bench_function(&format!("has_false(nulls_all_true, {len})"), |b| {
+            b.iter(|| hint::black_box(&with_nulls).has_false());
+        });
+    }
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/arrow-array/src/array/boolean_array.rs b/arrow-array/src/array/boolean_array.rs
index 79865b88fff6..ee3413e1833d 100644
--- a/arrow-array/src/array/boolean_array.rs
+++ b/arrow-array/src/array/boolean_array.rs
@@ -19,6 +19,7 @@ use crate::array::print_long_array;
 use crate::builder::BooleanBuilder;
 use crate::iterator::BooleanIter;
 use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
+use arrow_buffer::bit_chunk_iterator::UnalignedBitChunk;
 use arrow_buffer::{BooleanBuffer, Buffer, MutableBuffer, NullBuffer, bit_util};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::DataType;
@@ -156,7 +157,18 @@ impl BooleanArray {
         &self.values
     }
 
-    /// Returns the number of non null, true values within this array
+    /// Block size for chunked fold operations in [`Self::has_true`] and [`Self::has_false`].
+    /// Using `chunks_exact` with this size lets the compiler fully unroll the inner
+    /// fold (no inner branch/loop), enabling short-circuit exits every N chunks.
+    const CHUNK_FOLD_BLOCK_SIZE: usize = 16;
+
+    /// Returns an [`UnalignedBitChunk`] over this array's values.
+    fn unaligned_bit_chunks(&self) -> UnalignedBitChunk<'_> {
+        UnalignedBitChunk::new(self.values().values(), self.values().offset(), self.len())
+    }
+
+    /// Returns the number of non null, true values within this array.
+    /// If you only need to check if there is at least one true value, consider using `has_true()` which can short-circuit and be more efficient.
     pub fn true_count(&self) -> usize {
         match self.nulls() {
             Some(nulls) => {
@@ -171,11 +183,83 @@ impl BooleanArray {
         }
     }
 
-    /// Returns the number of non null, false values within this array
+    /// Returns the number of non null, false values within this array.
+    /// If you only need to check if there is at least one false value, consider using `has_false()` which can short-circuit and be more efficient.
     pub fn false_count(&self) -> usize {
         self.len() - self.null_count() - self.true_count()
     }
 
+    /// Returns whether there is at least one non-null `true` value in this array.
+    ///
+    /// This is more efficient than `true_count() > 0` because it can short-circuit
+    /// as soon as a `true` value is found, without counting all set bits.
+    ///
+    /// Null values are not counted as `true`. Returns `false` for empty arrays.
+    pub fn has_true(&self) -> bool {
+        match self.nulls() {
+            Some(nulls) => {
+                let null_chunks = nulls.inner().bit_chunks().iter_padded();
+                let value_chunks = self.values().bit_chunks().iter_padded();
+                null_chunks.zip(value_chunks).any(|(n, v)| (n & v) != 0)
+            }
+            None => {
+                let bit_chunks = self.unaligned_bit_chunks();
+                let chunks = bit_chunks.chunks();
+                let mut exact = chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE);
+                let found = bit_chunks.prefix().unwrap_or(0) != 0
+                    || exact.any(|block| block.iter().fold(0u64, |acc, &c| acc | c) != 0);
+                found
+                    || exact.remainder().iter().any(|&c| c != 0)
+                    || bit_chunks.suffix().unwrap_or(0) != 0
+            }
+        }
+    }
+
+    /// Returns whether there is at least one non-null `false` value in this array.
+    ///
+    /// This is more efficient than `false_count() > 0` because it can short-circuit
+    /// as soon as a `false` value is found, without counting all set bits.
+    ///
+    /// Null values are not counted as `false`. Returns `false` for empty arrays.
+    pub fn has_false(&self) -> bool {
+        match self.nulls() {
+            Some(nulls) => {
+                let null_chunks = nulls.inner().bit_chunks().iter_padded();
+                let value_chunks = self.values().bit_chunks().iter_padded();
+                null_chunks.zip(value_chunks).any(|(n, v)| (n & !v) != 0)
+            }
+            None => {
+                let bit_chunks = self.unaligned_bit_chunks();
+                // UnalignedBitChunk zeros padding bits; fill them with 1s so
+                // they don't appear as false values.
+                let lead_mask = !((1u64 << bit_chunks.lead_padding()) - 1);
+                let trail_mask = if bit_chunks.trailing_padding() == 0 {
+                    u64::MAX
+                } else {
+                    (1u64 << (64 - bit_chunks.trailing_padding())) - 1
+                };
+                let (prefix_fill, suffix_fill) = match (bit_chunks.prefix(), bit_chunks.suffix()) {
+                    (Some(_), Some(_)) => (!lead_mask, !trail_mask),
+                    (Some(_), None) => (!lead_mask | !trail_mask, 0),
+                    (None, Some(_)) => (0, !trail_mask),
+                    (None, None) => (0, 0),
+                };
+                let chunks = bit_chunks.chunks();
+                let mut exact = chunks.chunks_exact(Self::CHUNK_FOLD_BLOCK_SIZE);
+                let found = bit_chunks
+                    .prefix()
+                    .is_some_and(|v| (v | prefix_fill) != u64::MAX)
+                    || exact
+                        .any(|block| block.iter().fold(u64::MAX, |acc, &c| acc & c) != u64::MAX);
+                found
+                    || exact.remainder().iter().any(|&c| c != u64::MAX)
+                    || bit_chunks
+                        .suffix()
+                        .is_some_and(|v| (v | suffix_fill) != u64::MAX)
+            }
+        }
+    }
+
     /// Returns the boolean value at index `i`.
     ///
     /// Note: This method does not check for nulls and the value is arbitrary
@@ -346,6 +430,14 @@ unsafe impl Array for BooleanArray {
     fn get_array_memory_size(&self) -> usize {
         std::mem::size_of::<Self>() + self.get_buffer_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.values.claim(pool);
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
 }
 
 impl ArrayAccessor for &BooleanArray {
@@ -534,12 +626,7 @@ impl BooleanArray {
         });
 
         let values = BooleanBuffer::new(val_builder.into(), 0, data_len);
-        let nulls = Some(NullBuffer::new(BooleanBuffer::new(
-            null_builder.into(),
-            0,
-            data_len,
-        )))
-        .filter(|n| n.null_count() > 0);
+        let nulls = NullBuffer::from_unsliced_buffer(null_builder, data_len);
         BooleanArray::new(values, nulls)
     }
 }
@@ -851,4 +938,128 @@ mod tests {
         assert!(sliced.is_valid(1));
         assert!(!sliced.value(1));
     }
+
+    #[test]
+    fn test_has_true_has_false_all_true() {
+        let arr = BooleanArray::from(vec![true, true, true]);
+        assert!(arr.has_true());
+        assert!(!arr.has_false());
+    }
+
+    #[test]
+    fn test_has_true_has_false_all_false() {
+        let arr = BooleanArray::from(vec![false, false, false]);
+        assert!(!arr.has_true());
+        assert!(arr.has_false());
+    }
+
+    #[test]
+    fn test_has_true_has_false_mixed() {
+        let arr = BooleanArray::from(vec![true, false, true]);
+        assert!(arr.has_true());
+        assert!(arr.has_false());
+    }
+
+    #[test]
+    fn test_has_true_has_false_empty() {
+        let arr = BooleanArray::from(Vec::<bool>::new());
+        assert!(!arr.has_true());
+        assert!(!arr.has_false());
+    }
+
+    #[test]
+    fn test_has_true_has_false_nulls_all_valid_true() {
+        let arr = BooleanArray::from(vec![Some(true), None, Some(true)]);
+        assert!(arr.has_true());
+        assert!(!arr.has_false());
+    }
+
+    #[test]
+    fn test_has_true_has_false_nulls_all_valid_false() {
+        let arr = BooleanArray::from(vec![Some(false), None, Some(false)]);
+        assert!(!arr.has_true());
+        assert!(arr.has_false());
+    }
+
+    #[test]
+    fn test_has_true_has_false_all_null() {
+        let arr = BooleanArray::new_null(5);
+        assert!(!arr.has_true());
+        assert!(!arr.has_false());
+    }
+
+    #[test]
+    fn test_has_false_aligned_suffix_all_true() {
+        let arr = BooleanArray::from(vec![true; 129]);
+        assert!(arr.has_true());
+        assert!(!arr.has_false());
+    }
+
+    #[test]
+    fn test_has_false_non_aligned_all_true() {
+        // 65 elements: exercises the remainder path in has_false
+        let arr = BooleanArray::from(vec![true; 65]);
+        assert!(arr.has_true());
+        assert!(!arr.has_false());
+    }
+
+    #[test]
+    fn test_has_false_non_aligned_last_false() {
+        // 64 trues + 1 false: remainder path should find the false
+        let mut values = vec![true; 64];
+        values.push(false);
+        let arr = BooleanArray::from(values);
+        assert!(arr.has_true());
+        assert!(arr.has_false());
+    }
+
+    #[test]
+    fn test_has_false_exact_64_all_true() {
+        // Exactly 64 elements, no remainder
+        let arr = BooleanArray::from(vec![true; 64]);
+        assert!(arr.has_true());
+        assert!(!arr.has_false());
+    }
+
+    #[test]
+    fn test_has_true_has_false_unaligned_slices() {
+        let cases = [
+            (1, 129, true, false),
+            (3, 130, true, false),
+            (5, 65, true, false),
+            (7, 64, true, false),
+        ];
+
+        let base = BooleanArray::from(vec![true; 300]);
+
+        for (offset, len, expected_has_true, expected_has_false) in cases {
+            let arr = base.slice(offset, len);
+            assert_eq!(
+                arr.has_true(),
+                expected_has_true,
+                "offset={offset} len={len}"
+            );
+            assert_eq!(
+                arr.has_false(),
+                expected_has_false,
+                "offset={offset} len={len}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_has_true_has_false_exact_multiples_of_64() {
+        let cases = [
+            (64, true, false),
+            (128, true, false),
+            (192, true, false),
+            (256, true, false),
+        ];
+
+        for (len, expected_has_true, expected_has_false) in cases {
+            let arr = BooleanArray::from(vec![true; len]);
+            assert_eq!(arr.has_true(), expected_has_true, "len={len}");
+            assert_eq!(arr.has_false(), expected_has_false, "len={len}");
+        }
+    }
 }
diff --git a/arrow-array/src/array/byte_array.rs b/arrow-array/src/array/byte_array.rs
index a54e9a5fc781..93924ac76bb2 100644
--- a/arrow-array/src/array/byte_array.rs
+++ b/arrow-array/src/array/byte_array.rs
@@ -525,6 +525,15 @@ unsafe impl<T: ByteArrayType> Array for GenericByteArray<T> {
     fn get_array_memory_size(&self) -> usize {
         std::mem::size_of::<Self>() + self.get_buffer_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.value_offsets.claim(pool);
+        self.value_data.claim(pool);
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
 }
 
 impl<'a, T: ByteArrayType> ArrayAccessor for &'a GenericByteArray<T> {
diff --git a/arrow-array/src/array/byte_view_array.rs b/arrow-array/src/array/byte_view_array.rs
index 0275b628e2cf..a4a319df6426 100644
--- a/arrow-array/src/array/byte_view_array.rs
+++ b/arrow-array/src/array/byte_view_array.rs
@@ -897,6 +897,17 @@ unsafe impl<T: ByteViewType + ?Sized> Array for GenericByteViewArray<T> {
     fn get_array_memory_size(&self) -> usize {
         std::mem::size_of::<Self>() + self.get_buffer_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.views.claim(pool);
+        for buffer in self.buffers.iter() {
+            buffer.claim(pool);
+        }
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
 }
 
 impl<'a, T: ByteViewType + ?Sized> ArrayAccessor for &'a GenericByteViewArray<T> {
diff --git a/arrow-array/src/array/dictionary_array.rs b/arrow-array/src/array/dictionary_array.rs
index 97e45cc5d68e..0c465ec14446 100644
--- a/arrow-array/src/array/dictionary_array.rs
+++ b/arrow-array/src/array/dictionary_array.rs
@@ -792,6 +792,12 @@ unsafe impl<T: ArrowDictionaryKeyType> Array for DictionaryArray<T> {
             + self.keys.get_buffer_memory_size()
             + self.values.get_array_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.keys.claim(pool);
+        self.values.claim(pool);
+    }
 }
 
 impl<T: ArrowDictionaryKeyType> std::fmt::Debug for DictionaryArray<T> {
@@ -911,6 +917,11 @@ unsafe impl<K: ArrowDictionaryKeyType, V: Sync> Array for TypedDictionaryArray<'
     fn get_array_memory_size(&self) -> usize {
         self.dictionary.get_array_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.dictionary.claim(pool);
+    }
 }
 
 impl<K, V> IntoIterator for TypedDictionaryArray<'_, K, V>
diff --git a/arrow-array/src/array/fixed_size_binary_array.rs b/arrow-array/src/array/fixed_size_binary_array.rs
index f9a4919b2c30..72e6d022a58a 100644
--- a/arrow-array/src/array/fixed_size_binary_array.rs
+++ b/arrow-array/src/array/fixed_size_binary_array.rs
@@ -19,7 +19,7 @@ use crate::array::print_long_array;
 use crate::iterator::FixedSizeBinaryIter;
 use crate::{Array, ArrayAccessor, ArrayRef, FixedSizeListArray, Scalar};
 use arrow_buffer::buffer::NullBuffer;
-use arrow_buffer::{ArrowNativeType, BooleanBuffer, Buffer, MutableBuffer, bit_util};
+use arrow_buffer::{ArrowNativeType, Buffer, MutableBuffer, bit_util};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
 use std::any::Any;
@@ -328,8 +328,7 @@ impl FixedSizeBinaryArray {
             ));
         }
 
-        let null_buf = BooleanBuffer::new(null_buf.into(), 0, len);
-        let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0);
+        let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
 
         let size = size.unwrap_or(0) as i32;
         Ok(Self {
@@ -406,8 +405,7 @@ impl FixedSizeBinaryArray {
             Ok(())
         })?;
 
-        let null_buf = BooleanBuffer::new(null_buf.into(), 0, len);
-        let nulls = Some(NullBuffer::new(null_buf)).filter(|n| n.null_count() > 0);
+        let nulls = NullBuffer::from_unsliced_buffer(null_buf, len);
 
         Ok(Self {
             data_type: DataType::FixedSizeBinary(size),
@@ -664,6 +662,14 @@ unsafe impl Array for FixedSizeBinaryArray {
     fn get_array_memory_size(&self) -> usize {
         std::mem::size_of::<Self>() + self.get_buffer_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.value_data.claim(pool);
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
 }
 
 impl<'a> ArrayAccessor for &'a FixedSizeBinaryArray {
diff --git a/arrow-array/src/array/fixed_size_list_array.rs b/arrow-array/src/array/fixed_size_list_array.rs
index ce75855c6815..55a9fb9aa49e 100644
--- a/arrow-array/src/array/fixed_size_list_array.rs
+++ b/arrow-array/src/array/fixed_size_list_array.rs
@@ -528,6 +528,26 @@ unsafe impl Array for FixedSizeListArray {
         }
         size
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.values.claim(pool);
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
+}
+
+impl super::ListLikeArray for FixedSizeListArray {
+    fn values(&self) -> &ArrayRef {
+        self.values()
+    }
+
+    fn element_range(&self, index: usize) -> std::ops::Range<usize> {
+        let value_length = self.value_length().as_usize();
+        let offset = index * value_length;
+        offset..(offset + value_length)
+    }
 }
 
 impl ArrayAccessor for FixedSizeListArray {
diff --git a/arrow-array/src/array/list_array.rs b/arrow-array/src/array/list_array.rs
index e4c603e0d921..24f7774f2b7d 100644
--- a/arrow-array/src/array/list_array.rs
+++ b/arrow-array/src/array/list_array.rs
@@ -620,6 +620,28 @@ unsafe impl<OffsetSize: OffsetSizeTrait> Array for GenericListArray<OffsetSize>
         }
         size
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.value_offsets.claim(pool);
+        self.values.claim(pool);
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait> super::ListLikeArray for GenericListArray<OffsetSize> {
+    fn values(&self) -> &ArrayRef {
+        self.values()
+    }
+
+    fn element_range(&self, index: usize) -> std::ops::Range<usize> {
+        let offsets = self.offsets();
+        let start = offsets[index].as_usize();
+        let end = offsets[index + 1].as_usize();
+        start..end
+    }
 }
 
 impl<OffsetSize: OffsetSizeTrait> ArrayAccessor for &GenericListArray<OffsetSize> {
diff --git a/arrow-array/src/array/list_view_array.rs b/arrow-array/src/array/list_view_array.rs
index b8d427d829c8..75ff6117eeba 100644
--- a/arrow-array/src/array/list_view_array.rs
+++ b/arrow-array/src/array/list_view_array.rs
@@ -486,6 +486,28 @@ unsafe impl<OffsetSize: OffsetSizeTrait> Array for GenericListViewArray<OffsetSi
         }
         size
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.value_offsets.claim(pool);
+        self.value_sizes.claim(pool);
+        self.values.claim(pool);
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
+}
+
+impl<OffsetSize: OffsetSizeTrait> super::ListLikeArray for GenericListViewArray<OffsetSize> {
+    fn values(&self) -> &ArrayRef {
+        self.values()
+    }
+
+    fn element_range(&self, index: usize) -> std::ops::Range<usize> {
+        let offset = self.value_offsets()[index].as_usize();
+        let size = self.value_sizes()[index].as_usize();
+        offset..(offset + size)
+    }
 }
 
 impl<OffsetSize: OffsetSizeTrait> std::fmt::Debug for GenericListViewArray<OffsetSize> {
diff --git a/arrow-array/src/array/map_array.rs b/arrow-array/src/array/map_array.rs
index 07758d59bb14..7a5fe0b46843 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -430,6 +430,15 @@ unsafe impl Array for MapArray {
         }
         size
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.value_offsets.claim(pool);
+        self.entries.claim(pool);
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
 }
 
 impl ArrayAccessor for &MapArray {
diff --git a/arrow-array/src/array/mod.rs b/arrow-array/src/array/mod.rs
index 0d8125a2a1db..e389b462fbe1 100644
--- a/arrow-array/src/array/mod.rs
+++ b/arrow-array/src/array/mod.rs
@@ -354,6 +354,75 @@ pub unsafe trait Array: std::fmt::Debug + Send + Sync {
     /// This value will always be greater than returned by `get_buffer_memory_size()` and
     /// includes the overhead of the data structures that contain the pointers to the various buffers.
     fn get_array_memory_size(&self) -> usize;
+
+    /// Claim memory used by this array in the provided memory pool.
+    ///
+    /// This recursively claims memory for:
+    /// - All data buffers in this array
+    /// - All child arrays (for nested types like List, Struct, etc.)
+    /// - The null bitmap buffer if present
+    ///
+    /// This method guarantees that the memory pool will only compute occupied memory
+    /// exactly once. For example, if this array is derived from operations like `slice`,
+    /// calling `claim` on it would not change the memory pool's usage if the underlying buffers
+    /// are already counted before.
+    ///
+    /// # Example
+    /// ```
+    /// # use arrow_array::{Int32Array, Array};
+    /// # use arrow_buffer::TrackingMemoryPool;
+    /// # use arrow_buffer::MemoryPool;
+    ///
+    /// let pool = TrackingMemoryPool::default();
+    ///
+    /// let small_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+    /// let small_array_size = small_array.get_buffer_memory_size();
+    ///
+    /// // Claim the array's memory in the pool
+    /// small_array.claim(&pool);
+    ///
+    /// // Create and claim slices of `small_array`; should not increase memory usage
+    /// let slice1 = small_array.slice(0, 2);
+    /// let slice2 = small_array.slice(2, 2);
+    /// slice1.claim(&pool);
+    /// slice2.claim(&pool);
+    ///
+    /// assert_eq!(pool.used(), small_array_size);
+    ///
+    /// // Create a `large_array` which does not derive from the original `small_array`
+    ///
+    /// let large_array = Int32Array::from((0..1000).collect::<Vec<i32>>());
+    /// let large_array_size = large_array.get_buffer_memory_size();
+    ///
+    /// large_array.claim(&pool);
+    ///
+    /// // Trying to claim more than once is a no-op
+    /// large_array.claim(&pool);
+    /// large_array.claim(&pool);
+    ///
+    /// assert_eq!(pool.used(), small_array_size + large_array_size);
+    ///
+    /// let sum_of_all_sizes = small_array_size + large_array_size + slice1.get_buffer_memory_size() + slice2.get_buffer_memory_size();
+    ///
+    /// // `get_buffer_memory_size` works independently of the memory pool, so a sum of all the
+    /// // arrays in scope will always be >= the memory used reported by the memory pool.
+    /// assert_ne!(pool.used(), sum_of_all_sizes);
+    ///
+    /// // Until the final claim is dropped the buffer size remains accounted for
+    /// drop(small_array);
+    /// drop(slice1);
+    ///
+    /// assert_eq!(pool.used(), small_array_size + large_array_size);
+    ///
+    /// // Dropping this finally releases the buffer that was backing `small_array`
+    /// drop(slice2);
+    ///
+    /// assert_eq!(pool.used(), large_array_size);
+    /// ```
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.to_data().claim(pool)
+    }
 }
 
 /// A reference-counted reference to a generic `Array`
@@ -437,6 +506,11 @@ unsafe impl Array for ArrayRef {
     fn get_array_memory_size(&self) -> usize {
         self.as_ref().get_array_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.as_ref().claim(pool)
+    }
 }
 
 unsafe impl<T: Array> Array for &T {
@@ -507,6 +581,11 @@ unsafe impl<T: Array> Array for &T {
     fn get_array_memory_size(&self) -> usize {
         T::get_array_memory_size(self)
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        T::claim(self, pool)
+    }
 }
 
 /// A generic trait for accessing the values of an [`Array`]
@@ -667,6 +746,21 @@ impl<'a> BinaryArrayType<'a> for &'a FixedSizeBinaryArray {
     }
 }
 
+/// A trait for Arrow list-like arrays, abstracting over
+/// [`GenericListArray`], [`GenericListViewArray`], and [`FixedSizeListArray`].
+///
+/// This trait provides a uniform interface for accessing the child values and
+/// computing the element range for a given index, regardless of the underlying
+/// list layout (offsets, offsets+sizes, or fixed-size).
+pub trait ListLikeArray: Array {
+    /// Returns the child values array.
+    fn values(&self) -> &ArrayRef;
+
+    /// Returns the start and end indices into the values array for the list
+    /// element at `index`.
+    fn element_range(&self, index: usize) -> std::ops::Range<usize>;
+}
+
 impl PartialEq for dyn Array + '_ {
     fn eq(&self, other: &Self) -> bool {
         self.to_data().eq(&other.to_data())
diff --git a/arrow-array/src/array/null_array.rs b/arrow-array/src/array/null_array.rs
index 00b30935d425..05dd114be71b 100644
--- a/arrow-array/src/array/null_array.rs
+++ b/arrow-array/src/array/null_array.rs
@@ -133,6 +133,11 @@ unsafe impl Array for NullArray {
     fn get_array_memory_size(&self) -> usize {
         std::mem::size_of::<Self>()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, _pool: &dyn arrow_buffer::MemoryPool) {
+        // NullArray has no buffers to claim
+    }
 }
 
 impl From<ArrayData> for NullArray {
diff --git a/arrow-array/src/array/primitive_array.rs b/arrow-array/src/array/primitive_array.rs
index 29189b450a40..b51f5f518668 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -25,9 +25,7 @@ use crate::timezone::Tz;
 use crate::trusted_len::trusted_len_unzip;
 use crate::types::*;
 use crate::{Array, ArrayAccessor, ArrayRef, Scalar};
-use arrow_buffer::{
-    ArrowNativeType, BooleanBuffer, Buffer, NullBuffer, NullBufferBuilder, ScalarBuffer, i256,
-};
+use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, NullBufferBuilder, ScalarBuffer, i256};
 use arrow_data::bit_iterator::try_for_each_valid_idx;
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
@@ -1248,6 +1246,14 @@ unsafe impl<T: ArrowPrimitiveType> Array for PrimitiveArray<T> {
     fn get_array_memory_size(&self) -> usize {
         std::mem::size_of::<Self>() + self.get_buffer_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.values.claim(pool);
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
 }
 
 impl<T: ArrowPrimitiveType> ArrayAccessor for &PrimitiveArray<T> {
@@ -1490,8 +1496,7 @@ impl<T: ArrowPrimitiveType> PrimitiveArray<T> {
 
         let (null, buffer) = unsafe { trusted_len_unzip(iterator) };
 
-        let nulls =
-            Some(NullBuffer::new(BooleanBuffer::new(null, 0, len))).filter(|n| n.null_count() > 0);
+        let nulls = NullBuffer::from_unsliced_buffer(null, len);
         PrimitiveArray::new(ScalarBuffer::from(buffer), nulls)
     }
 }
diff --git a/arrow-array/src/array/run_array.rs b/arrow-array/src/array/run_array.rs
index 4770bad05e7d..f317af6a10f0 100644
--- a/arrow-array/src/array/run_array.rs
+++ b/arrow-array/src/array/run_array.rs
@@ -123,6 +123,70 @@ impl<R: RunEndIndexType> RunArray<R> {
         Ok(array_data.into())
     }
 
+    /// Create a new [`RunArray`] from the provided parts, without validation
+    ///
+    /// # Safety
+    ///
+    /// Safe if [`Self::try_new`] would not error
+    pub unsafe fn new_unchecked(
+        data_type: DataType,
+        run_ends: RunEndBuffer<R::Native>,
+        values: ArrayRef,
+    ) -> Self {
+        if cfg!(feature = "force_validate") {
+            match &data_type {
+                DataType::RunEndEncoded(run_ends, values_field) => {
+                    assert!(!run_ends.is_nullable(), "run_ends should not be nullable");
+                    assert_eq!(
+                        run_ends.data_type(),
+                        &R::DATA_TYPE,
+                        "Incorrect run ends type"
+                    );
+                    assert_eq!(
+                        values_field.data_type(),
+                        values.data_type(),
+                        "Incorrect values type"
+                    );
+                }
+                _ => {
+                    panic!(
+                        "Invalid data type {data_type:?} for RunArray. Should be DataType::RunEndEncoded"
+                    );
+                }
+            }
+
+            let run_array = Self {
+                data_type,
+                run_ends,
+                values,
+            };
+
+            // Safety: `validate_data` checks below
+            //    1. The given array data has exactly two child arrays.
+            //    2. The first child array (run_ends) has valid data type.
+            //    3. run_ends array does not have null values
+            //    4. run_ends array has non-zero and strictly increasing values.
+            //    5. The length of run_ends array and values array are the same.
+            run_array
+                .to_data()
+                .validate_data()
+                .expect("RunArray data should be valid");
+
+            return run_array;
+        }
+
+        Self {
+            data_type,
+            run_ends,
+            values,
+        }
+    }
+
+    /// Deconstruct this array into its constituent parts
+    pub fn into_parts(self) -> (DataType, RunEndBuffer<R::Native>, ArrayRef) {
+        (self.data_type, self.run_ends, self.values)
+    }
+
     /// Returns a reference to the [`RunEndBuffer`].
     pub fn run_ends(&self) -> &RunEndBuffer<R::Native> {
         &self.run_ends
@@ -258,6 +322,7 @@ impl<R: RunEndIndexType> From<ArrayData> for RunArray<R> {
         let run_ends = unsafe { RunEndBuffer::new_unchecked(scalar, offset, len) };
 
         let values = make_array(values_child);
+
         Self {
             data_type,
             run_ends,
@@ -375,6 +440,12 @@ unsafe impl<T: RunEndIndexType> Array for RunArray<T> {
             + self.run_ends.inner().inner().capacity()
             + self.values.get_array_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.run_ends.claim(pool);
+        self.values.claim(pool);
+    }
 }
 
 impl<R: RunEndIndexType> std::fmt::Debug for RunArray<R> {
@@ -603,6 +674,11 @@ unsafe impl<R: RunEndIndexType, V: Sync> Array for TypedRunArray<'_, R, V> {
     fn get_array_memory_size(&self) -> usize {
         self.run_array.get_array_memory_size()
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.run_array.claim(pool);
+    }
 }
 
 // Array accessor converts the index of logical array to the index of the physical array
@@ -1295,4 +1371,44 @@ mod tests {
         let slice3 = array1.slice(0, 4); // a, a, b, b
         assert_ne!(slice1, slice3);
     }
+
+    #[test]
+    #[cfg(not(feature = "force_validate"))]
+    fn allow_to_create_invalid_array_using_new_unchecked() {
+        let valid = RunArray::<Int32Type>::from_iter(["32"]);
+        let (_, buffer, values) = valid.into_parts();
+
+        let _ = unsafe {
+            // mismatch data type
+            RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
+        };
+    }
+
+    #[test]
+    #[should_panic(
+        expected = "Invalid data type Int64 for RunArray. Should be DataType::RunEndEncoded"
+    )]
+    #[cfg(feature = "force_validate")]
+    fn should_not_be_able_to_create_invalid_array_using_new_unchecked_when_force_validate_is_enabled()
+     {
+        let valid = RunArray::<Int32Type>::from_iter(["32"]);
+        let (_, buffer, values) = valid.into_parts();
+
+        let _ = unsafe {
+            // mismatch data type
+            RunArray::<Int32Type>::new_unchecked(DataType::Int64, buffer, values)
+        };
+    }
+
+    #[test]
+    fn test_run_array_roundtrip() {
+        let run = Int32Array::from(vec![3, 6, 9, 12]);
+        let values = Int32Array::from(vec![Some(0), None, Some(1), None]);
+        let array = RunArray::try_new(&run, &values).unwrap();
+
+        let (dt, buffer, values) = array.clone().into_parts();
+        let created_from_parts =
+            unsafe { RunArray::<Int32Type>::new_unchecked(dt, buffer, values) };
+        assert_eq!(array, created_from_parts);
+    }
 }
diff --git a/arrow-array/src/array/struct_array.rs b/arrow-array/src/array/struct_array.rs
index b5f25fff181c..da837ba16b75 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -468,6 +468,16 @@ unsafe impl Array for StructArray {
         }
         size
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        for field in &self.fields {
+            field.claim(pool);
+        }
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+    }
 }
 
 impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
diff --git a/arrow-array/src/array/union_array.rs b/arrow-array/src/array/union_array.rs
index 03d69a584524..5ba7b947c724 100644
--- a/arrow-array/src/array/union_array.rs
+++ b/arrow-array/src/array/union_array.rs
@@ -946,6 +946,17 @@ unsafe impl Array for UnionArray {
                 .sum::<usize>()
             + sum
     }
+
+    #[cfg(feature = "pool")]
+    fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        self.type_ids.claim(pool);
+        if let Some(offsets) = &self.offsets {
+            offsets.claim(pool);
+        }
+        for field in self.fields.iter().flatten() {
+            field.claim(pool);
+        }
+    }
 }
 
 impl std::fmt::Debug for UnionArray {
diff --git a/arrow-array/src/builder/generic_bytes_builder.rs b/arrow-array/src/builder/generic_bytes_builder.rs
index 7ed4bc5826c0..0a83ff989d4d 100644
--- a/arrow-array/src/builder/generic_bytes_builder.rs
+++ b/arrow-array/src/builder/generic_bytes_builder.rs
@@ -110,6 +110,21 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
         self.offsets_builder.push(self.next_offset());
     }
 
+    /// Appends a value of type `T` into the builder `n` times.
+    ///
+    /// See [`Self::append_value`] for more panic information.
+    #[inline]
+    pub fn append_value_n(&mut self, value: impl AsRef<T::Native>, n: usize) {
+        let bytes: &[u8] = value.as_ref().as_ref();
+        self.value_builder.reserve(bytes.len() * n);
+        self.offsets_builder.reserve(n);
+        for _ in 0..n {
+            self.value_builder.extend_from_slice(bytes);
+            self.offsets_builder.push(self.next_offset());
+        }
+        self.null_buffer_builder.append_n_non_nulls(n);
+    }
+
     /// Append an `Option` value into the builder.
     ///
     /// - A `None` value will append a null value.
@@ -939,4 +954,21 @@ mod tests {
 
         assert!(matches!(result, Err(ArrowError::OffsetOverflowError(_))));
     }
+
+    #[test]
+    fn test_append_value_n() {
+        let mut builder = GenericStringBuilder::<i32>::new();
+        builder.append_value("hello");
+        builder.append_value_n("world", 3);
+        builder.append_null();
+        let array = builder.finish();
+
+        assert_eq!(5, array.len());
+        assert_eq!(1, array.null_count());
+        assert_eq!("hello", array.value(0));
+        assert_eq!("world", array.value(1));
+        assert_eq!("world", array.value(2));
+        assert_eq!("world", array.value(3));
+        assert!(array.is_null(4));
+    }
 }
diff --git a/arrow-array/src/builder/map_builder.rs b/arrow-array/src/builder/map_builder.rs
index b70d4b73880b..5ff1625b4992 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -154,11 +154,9 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
         (&mut self.key_builder, &mut self.value_builder)
     }
 
-    /// Finish the current map array slot
-    ///
-    /// Returns an error if the key and values builders are in an inconsistent state.
+    /// Validates that key and value builders have equal lengths.
     #[inline]
-    pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> {
+    fn validate_equal_lengths(&self) -> Result<(), ArrowError> {
         if self.key_builder.len() != self.value_builder.len() {
             return Err(ArrowError::InvalidArgumentError(format!(
                 "Cannot append to a map builder when its keys and values have unequal lengths of {} and {}",
@@ -166,11 +164,32 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
                 self.value_builder.len()
             )));
         }
+        Ok(())
+    }
+
+    /// Finish the current map array slot
+    ///
+    /// Returns an error if the key and values builders are in an inconsistent state.
+    #[inline]
+    pub fn append(&mut self, is_valid: bool) -> Result<(), ArrowError> {
+        self.validate_equal_lengths()?;
         self.offsets_builder.push(self.key_builder.len() as i32);
         self.null_buffer_builder.append(is_valid);
         Ok(())
     }
 
+    /// Append `n` nulls to this [`MapBuilder`]
+    ///
+    /// Returns an error if the key and values builders are in an inconsistent state.
+    #[inline]
+    pub fn append_nulls(&mut self, n: usize) -> Result<(), ArrowError> {
+        self.validate_equal_lengths()?;
+        let offset = self.key_builder.len() as i32;
+        self.offsets_builder.extend(std::iter::repeat_n(offset, n));
+        self.null_buffer_builder.append_n_nulls(n);
+        Ok(())
+    }
+
     /// Builds the [`MapArray`]
     pub fn finish(&mut self) -> MapArray {
         let len = self.len();
@@ -436,6 +455,42 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_append_nulls() {
+        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new());
+
+        builder.keys().append_value(1);
+        builder.values().append_value(100);
+        builder.append(true).unwrap();
+
+        builder.append_nulls(3).unwrap();
+
+        builder.keys().append_value(2);
+        builder.values().append_value(200);
+        builder.append(true).unwrap();
+
+        let map = builder.finish();
+        assert_eq!(map.len(), 5);
+        assert_eq!(map.null_count(), 3);
+        assert!(map.is_valid(0));
+        assert!(map.is_null(1));
+        assert!(map.is_null(2));
+        assert!(map.is_null(3));
+        assert!(map.is_valid(4));
+        assert_eq!(map.value_offsets(), &[0, 1, 1, 1, 1, 2]);
+    }
+
+    #[test]
+    fn test_append_nulls_inconsistent_state() {
+        let mut builder = MapBuilder::new(None, Int32Builder::new(), Int32Builder::new());
+        // Add a key without a matching value
+        builder.keys().append_value(1);
+
+        let result = builder.append_nulls(2);
+        assert!(result.is_err());
+        assert!(result.unwrap_err().to_string().contains("unequal lengths"));
+    }
+
     #[test]
     #[should_panic(expected = "Keys field must not be nullable")]
     fn test_with_nullable_keys_field() {
diff --git a/arrow-array/src/builder/primitive_run_builder.rs b/arrow-array/src/builder/primitive_run_builder.rs
index 52bdaa6f40e4..c1dc0d8d7d4b 100644
--- a/arrow-array/src/builder/primitive_run_builder.rs
+++ b/arrow-array/src/builder/primitive_run_builder.rs
@@ -108,6 +108,20 @@ where
             prev_run_end_index: 0,
         }
     }
+
+    /// Overrides the data type of the values child array.
+    ///
+    /// By default, `V::DATA_TYPE` is used (via [`PrimitiveBuilder`]). This
+    /// allows setting the timezone of a Timestamp, the precision & scale of a
+    /// Decimal, etc.
+    ///
+    /// # Panics
+    ///
+    /// This method panics if `values_builder` rejects `data_type`.
+    pub fn with_data_type(mut self, data_type: arrow_schema::DataType) -> Self {
+        self.values_builder = self.values_builder.with_data_type(data_type);
+        self
+    }
 }
 
 impl<R, V> ArrayBuilder for PrimitiveRunBuilder<R, V>
@@ -259,10 +273,12 @@ where
 
 #[cfg(test)]
 mod tests {
+    use arrow_schema::DataType;
+
     use crate::builder::PrimitiveRunBuilder;
     use crate::cast::AsArray;
-    use crate::types::{Int16Type, UInt32Type};
-    use crate::{Array, UInt32Array};
+    use crate::types::{Decimal128Type, Int16Type, TimestampMicrosecondType, UInt32Type};
+    use crate::{Array, Decimal128Array, TimestampMicrosecondArray, UInt32Array};
 
     #[test]
     fn test_primitive_ree_array_builder() {
@@ -310,4 +326,38 @@ mod tests {
             &[1, 2, 5, 4, 6, 2]
         );
     }
+
+    #[test]
+    #[should_panic]
+    fn test_override_data_type_invalid() {
+        PrimitiveRunBuilder::<Int16Type, UInt32Type>::new().with_data_type(DataType::UInt64);
+    }
+
+    #[test]
+    fn test_override_data_type() {
+        // Noop.
+        PrimitiveRunBuilder::<Int16Type, UInt32Type>::new().with_data_type(DataType::UInt32);
+
+        // Setting scale & precision.
+        let mut builder = PrimitiveRunBuilder::<Int16Type, Decimal128Type>::new()
+            .with_data_type(DataType::Decimal128(1, 2));
+        builder.append_value(123);
+        let array = builder.finish();
+        let array = array.downcast::<Decimal128Array>().unwrap();
+        let values = array.values();
+        assert_eq!(values.precision(), 1);
+        assert_eq!(values.scale(), 2);
+
+        // Setting timezone.
+        let mut builder = PrimitiveRunBuilder::<Int16Type, TimestampMicrosecondType>::new()
+            .with_data_type(DataType::Timestamp(
+                arrow_schema::TimeUnit::Microsecond,
+                Some("Europe/Paris".into()),
+            ));
+        builder.append_value(1);
+        let array = builder.finish();
+        let array = array.downcast::<TimestampMicrosecondArray>().unwrap();
+        let values = array.values();
+        assert_eq!(values.timezone(), Some("Europe/Paris"));
+    }
 }
diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs
index ad58e008572f..795593c98a8a 100644
--- a/arrow-array/src/builder/struct_builder.rs
+++ b/arrow-array/src/builder/struct_builder.rs
@@ -213,6 +213,12 @@ impl StructBuilder {
         self.null_buffer_builder.append(is_valid);
     }
 
+    /// Appends `n` non-null entries into the builder.
+    #[inline]
+    pub fn append_non_nulls(&mut self, n: usize) {
+        self.null_buffer_builder.append_n_non_nulls(n);
+    }
+
     /// Appends a null element to the struct.
     #[inline]
     pub fn append_null(&mut self) {
@@ -727,4 +733,60 @@ mod tests {
         assert!(a1.is_valid(0));
         assert!(a1.is_null(1));
     }
+
+    #[test]
+    fn test_append_non_nulls() {
+        let int_builder = Int32Builder::new();
+        let fields = vec![Field::new("f1", DataType::Int32, false)];
+        let field_builders = vec![Box::new(int_builder) as Box<dyn ArrayBuilder>];
+
+        let mut builder = StructBuilder::new(fields, field_builders);
+        builder
+            .field_builder::<Int32Builder>(0)
+            .unwrap()
+            .append_slice(&[1, 2, 3, 4, 5]);
+        builder.append_non_nulls(5);
+
+        let arr = builder.finish();
+        assert_eq!(arr.len(), 5);
+        assert_eq!(arr.null_count(), 0);
+        for i in 0..5 {
+            assert!(arr.is_valid(i));
+        }
+    }
+
+    #[test]
+    fn test_append_non_nulls_with_nulls() {
+        let mut builder = StructBuilder::new(Fields::empty(), vec![]);
+        builder.append_null();
+        builder.append_non_nulls(3);
+        builder.append_nulls(2);
+        builder.append_non_nulls(1);
+
+        let arr = builder.finish();
+        assert_eq!(arr.len(), 7);
+        assert_eq!(arr.null_count(), 3);
+        assert!(arr.is_null(0));
+        assert!(arr.is_valid(1));
+        assert!(arr.is_valid(2));
+        assert!(arr.is_valid(3));
+        assert!(arr.is_null(4));
+        assert!(arr.is_null(5));
+        assert!(arr.is_valid(6));
+    }
+
+    #[test]
+    fn test_append_non_nulls_zero() {
+        let mut builder = StructBuilder::new(Fields::empty(), vec![]);
+        builder.append_non_nulls(0);
+        assert_eq!(builder.len(), 0);
+
+        builder.append(true);
+        builder.append_non_nulls(0);
+        assert_eq!(builder.len(), 1);
+
+        let arr = builder.finish();
+        assert_eq!(arr.len(), 1);
+        assert_eq!(arr.null_count(), 0);
+    }
 }
diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs
index de590ff87c77..d6cc242e0267 100644
--- a/arrow-array/src/cast.rs
+++ b/arrow-array/src/cast.rs
@@ -74,7 +74,7 @@ macro_rules! repeat_pat {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_integer {
-    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         match ($($data_type),+) {
             $crate::repeat_pat!($crate::cast::__private::DataType::Int8, $($data_type),+) => {
                 $m!($crate::types::Int8Type $(, $args)*)
@@ -100,7 +100,7 @@ macro_rules! downcast_integer {
             $crate::repeat_pat!($crate::cast::__private::DataType::UInt64, $($data_type),+) => {
                 $m!($crate::types::UInt64Type $(, $args)*)
             }
-            $($p $(if $pred)* => $fallback,)*
+            $($p $(if $pred)? => $fallback,)*
         }
     };
 }
@@ -138,21 +138,24 @@ macro_rules! downcast_integer {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_integer_array {
-    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_integer_array!($values => {$e} $($p $(if $pred)* => $fallback)*)
-    };
-    (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_integer_array!($($values),+ => {$e} $($p $(if $pred)* => $fallback)*)
-    };
-    ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_integer_array!(($($values),+) => $e $($p $(if $pred)* => $fallback)*)
-    };
-    (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         $crate::downcast_integer!{
             $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e),
-            $($p $(if $pred)* => $fallback,)*
+            $($p $(if $pred)? => $fallback,)*
         }
     };
+    // Turn $e into a block.
+    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_integer_array!($values => {$e} $($p $(if $pred)? => $fallback,)*)
+    };
+    // Remove $values parentheses.
+    (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_integer_array!($($values),+ => $e $($p $(if $pred)? => $fallback,)*)
+    };
+    // Turn $e into a block & remove $values parentheses.
+    (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_integer_array!($($values),+ => {$e} $($p $(if $pred)? => $fallback,)*)
+    };
 }
 
 /// Given one or more expressions evaluating to an integer [`DataType`] invokes the provided macro
@@ -189,7 +192,7 @@ macro_rules! downcast_integer_array {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_run_end_index {
-    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         match ($($data_type),+) {
             $crate::repeat_pat!($crate::cast::__private::DataType::Int16, $($data_type),+) => {
                 $m!($crate::types::Int16Type $(, $args)*)
@@ -200,7 +203,7 @@ macro_rules! downcast_run_end_index {
             $crate::repeat_pat!($crate::cast::__private::DataType::Int64, $($data_type),+) => {
                 $m!($crate::types::Int64Type $(, $args)*)
             }
-            $($p $(if $pred)* => $fallback,)*
+            $($p $(if $pred)? => $fallback,)*
         }
     };
 }
@@ -234,7 +237,7 @@ macro_rules! downcast_run_end_index {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_temporal {
-    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         match ($($data_type),+) {
             $crate::repeat_pat!($crate::cast::__private::DataType::Time32($crate::cast::__private::TimeUnit::Second), $($data_type),+) => {
                 $m!($crate::types::Time32SecondType $(, $args)*)
@@ -266,7 +269,7 @@ macro_rules! downcast_temporal {
             $crate::repeat_pat!($crate::cast::__private::DataType::Timestamp($crate::cast::__private::TimeUnit::Nanosecond, _), $($data_type),+) => {
                 $m!($crate::types::TimestampNanosecondType $(, $args)*)
             }
-            $($p $(if $pred)* => $fallback,)*
+            $($p $(if $pred)? => $fallback,)*
         }
     };
 }
@@ -304,21 +307,24 @@ macro_rules! downcast_temporal {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_temporal_array {
-    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_temporal_array!($values => {$e} $($p $(if $pred)* => $fallback)*)
-    };
-    (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_temporal_array!($($values),+ => {$e} $($p $(if $pred)* => $fallback)*)
-    };
-    ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_temporal_array!(($($values),+) => $e $($p $(if $pred)* => $fallback)*)
-    };
-    (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         $crate::downcast_temporal!{
             $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e),
-            $($p $(if $pred)* => $fallback,)*
+            $($p $(if $pred)? => $fallback,)*
         }
     };
+    // Turn $e into a block.
+    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_temporal_array!($values => {$e} $($p $(if $pred)? => $fallback,)*)
+    };
+    // Remove $values parentheses.
+    (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_temporal_array!($($values),+ => $e $($p $(if $pred)? => $fallback,)*)
+    };
+    // Turn $e into a block & remove $values parentheses.
+    (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_temporal_array!($($values),+ => {$e} $($p $(if $pred)? => $fallback,)*)
+    };
 }
 
 /// Given one or more expressions evaluating to primitive [`DataType`] invokes the provided macro
@@ -353,7 +359,7 @@ macro_rules! downcast_temporal_array {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_primitive {
-    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($($data_type:expr),+ => ($m:path $(, $args:tt)*), $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         $crate::downcast_integer! {
             $($data_type),+ => ($m $(, $args)*),
             $crate::repeat_pat!($crate::cast::__private::DataType::Float16, $($data_type),+) => {
@@ -401,7 +407,7 @@ macro_rules! downcast_primitive {
             _ => {
                 $crate::downcast_temporal! {
                     $($data_type),+ => ($m $(, $args)*),
-                    $($p $(if $pred)* => $fallback,)*
+                    $($p $(if $pred)? => $fallback,)*
                 }
             }
         }
@@ -450,21 +456,24 @@ macro_rules! downcast_primitive_array_helper {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_primitive_array {
-    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_primitive_array!($values => {$e} $($p $(if $pred)* => $fallback)*)
-    };
-    (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_primitive_array!($($values),+ => {$e} $($p $(if $pred)* => $fallback)*)
-    };
-    ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        $crate::downcast_primitive_array!(($($values),+) => $e $($p $(if $pred)* => $fallback)*)
-    };
-    (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($($values:ident),+ => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         $crate::downcast_primitive!{
             $($values.data_type()),+ => ($crate::downcast_primitive_array_helper, $($values),+, $e),
-            $($p $(if $pred)* => $fallback,)*
+            $($p $(if $pred)? => $fallback,)*
         }
     };
+    // Turn $e into a block.
+    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_primitive_array!($values => {$e} $($p $(if $pred)? => $fallback,)*)
+    };
+    // Remove $values parentheses.
+    (($($values:ident),+) => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_primitive_array!($($values),+ => $e $($p $(if $pred)? => $fallback,)*)
+    };
+    // Turn $e into a block & remove $values parentheses.
+    (($($values:ident),+) => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        $crate::downcast_primitive_array!($($values),+ => {$e} $($p $(if $pred)? => $fallback,)*)
+    };
 }
 
 /// Force downcast of an [`Array`], such as an [`ArrayRef`], to
@@ -546,11 +555,11 @@ macro_rules! downcast_dictionary_array_helper {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_dictionary_array {
-    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        downcast_dictionary_array!($values => {$e} $($p $(if $pred)* => $fallback)*)
+    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        downcast_dictionary_array!($values => {$e} $($p $(if $pred)? => $fallback,)*)
     };
 
-    ($values:ident => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($values:ident => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         match $values.data_type() {
             $crate::cast::__private::DataType::Dictionary(k, _) => {
                 $crate::downcast_integer! {
@@ -558,7 +567,7 @@ macro_rules! downcast_dictionary_array {
                     k => unreachable!("unsupported dictionary key type: {}", k)
                 }
             }
-            $($p $(if $pred)* => $fallback,)*
+            $($p $(if $pred)? => $fallback,)*
         }
     }
 }
@@ -654,11 +663,11 @@ macro_rules! downcast_run_array_helper {
 /// [`DataType`]: arrow_schema::DataType
 #[macro_export]
 macro_rules! downcast_run_array {
-    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
-        downcast_run_array!($values => {$e} $($p $(if $pred)* => $fallback)*)
+    ($values:ident => $e:expr, $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
+        downcast_run_array!($values => {$e} $($p $(if $pred)? => $fallback,)*)
     };
 
-    ($values:ident => $e:block $($p:pat $(if $pred:expr)* => $fallback:expr $(,)*)*) => {
+    ($values:ident => $e:block $($p:pat $(if $pred:expr)? => $fallback:expr $(,)?)*) => {
         match $values.data_type() {
             $crate::cast::__private::DataType::RunEndEncoded(k, _) => {
                 $crate::downcast_run_end_index! {
@@ -666,7 +675,7 @@ macro_rules! downcast_run_array {
                     k => unreachable!("unsupported run end index type: {}", k)
                 }
             }
-            $($p $(if $pred)* => $fallback,)*
+            $($p $(if $pred)? => $fallback,)*
         }
     }
 }
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index cfec969165a9..f400ac4d0de9 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -135,6 +135,18 @@ macro_rules! create_array {
     ($ty: tt, [$($values: expr),*]) => {
         std::sync::Arc::new(<$crate::create_array!(@from $ty)>::from(vec![$($values),*]))
     };
+
+    (Binary, $values: expr) => {
+        std::sync::Arc::new($crate::BinaryArray::from_vec($values))
+    };
+
+    (LargeBinary, $values: expr) => {
+        std::sync::Arc::new($crate::LargeBinaryArray::from_vec($values))
+    };
+
+    ($ty: tt, $values: expr) => {
+        std::sync::Arc::new(<$crate::create_array!(@from $ty)>::from($values))
+    };
 }
 
 /// Creates a record batch from literal slice of values, suitable for rapid
@@ -152,10 +164,22 @@ macro_rules! create_array {
 ///     ("c", Utf8, ["alpha", "beta", "gamma"])
 /// );
 /// ```
+///
+/// Variables and expressions are also supported:
+///
+/// ```rust
+/// use arrow_array::record_batch;
+///
+/// let values = vec![1, 2, 3];
+/// let batch = record_batch!(
+///     ("a", Int32, values),
+///     ("b", Float64, vec![Some(4.0), None, Some(5.0)])
+/// );
+/// ```
 /// Due to limitation of [`create_array!`] macro, support for limited data types is available.
 #[macro_export]
 macro_rules! record_batch {
-    ($(($name: expr, $type: ident, [$($values: expr),*])),*) => {
+    ($(($name: expr, $type: ident, $($values: tt)+)),*) => {
         {
             let schema = std::sync::Arc::new(arrow_schema::Schema::new(vec![
                 $(
@@ -163,16 +187,14 @@ macro_rules! record_batch {
                 )*
             ]));
 
-            let batch = $crate::RecordBatch::try_new(
+            $crate::RecordBatch::try_new(
                 schema,
                 vec![$(
-                    $crate::create_array!($type, [$($values),*]),
+                    $crate::create_array!($type, $($values)+),
                 )*]
-            );
-
-            batch
+            )
         }
-    }
+    };
 }
 
 /// A two-dimensional batch of column-oriented data with a defined
@@ -773,6 +795,20 @@ impl RecordBatch {
         RecordBatch::try_new(schema, columns)
     }
 
+    /// Registers all buffers in this record batch with the provided [`MemoryPool`].
+    ///
+    /// This claims memory for all columns in the batch by calling [`Array::claim`]
+    /// on each column.
+    ///
+    /// [`MemoryPool`]: arrow_buffer::MemoryPool
+    /// [`Array::claim`]: crate::Array::claim
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        for column in self.columns() {
+            column.claim(pool);
+        }
+    }
+
     /// Returns the total number of bytes of memory occupied physically by this batch.
     ///
     /// Note that this does not always correspond to the exact memory usage of a
@@ -981,6 +1017,35 @@ mod tests {
         assert_eq!(5, record_batch.column(1).len());
     }
 
+    #[test]
+    fn create_binary_record_batch_from_variables() {
+        let binary_values = vec![b"a".as_slice()];
+        let large_binary_values = vec![b"xxx".as_slice()];
+
+        let record_batch = record_batch!(
+            ("a", Binary, binary_values),
+            ("b", LargeBinary, large_binary_values)
+        )
+        .unwrap();
+
+        assert_eq!(1, record_batch.num_rows());
+        assert_eq!(2, record_batch.num_columns());
+        assert_eq!(
+            &DataType::Binary,
+            record_batch.schema().field(0).data_type()
+        );
+        assert_eq!(
+            &DataType::LargeBinary,
+            record_batch.schema().field(1).data_type()
+        );
+
+        let binary = record_batch.column(0).as_binary::<i32>();
+        assert_eq!(b"a", binary.value(0));
+
+        let large_binary = record_batch.column(1).as_binary::<i64>();
+        assert_eq!(b"xxx", large_binary.value(0));
+    }
+
     #[test]
     fn byte_size_should_not_regress() {
         let schema = Schema::new(vec![
diff --git a/arrow-array/src/types.rs b/arrow-array/src/types.rs
index ff1caaacaecc..267011d8af80 100644
--- a/arrow-array/src/types.rs
+++ b/arrow-array/src/types.rs
@@ -324,6 +324,7 @@ pub trait ArrowTimestampType: ArrowTemporalType<Native = i64> {
     /// Creates a ArrowTimestampType::Native from the provided [`NaiveDateTime`]
     ///
     /// See [`DataType::Timestamp`] for more information on timezone handling
+    #[deprecated(since = "58.1.0", note = "Use from_naive_datetime instead")]
     fn make_value(naive: NaiveDateTime) -> Option<i64>;
 
     /// Creates a timestamp value from a [`DateTime`] in any timezone.
@@ -350,7 +351,7 @@ pub trait ArrowTimestampType: ArrowTemporalType<Native = i64> {
                 chrono::offset::LocalResult::Ambiguous(dt1, _) => Self::from_datetime(dt1),
                 chrono::offset::LocalResult::None => None,
             },
-            None => Self::make_value(naive),
+            None => Self::from_datetime(naive.and_utc()),
         }
     }
 }
@@ -416,8 +417,7 @@ fn add_year_months<T: ArrowTimestampType>(
     let months = IntervalYearMonthType::to_months(delta);
     let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
     let res = add_months_datetime(res, months)?;
-    let res = res.naive_utc();
-    T::make_value(res)
+    T::from_naive_datetime(res.naive_utc(), None)
 }
 
 fn add_day_time<T: ArrowTimestampType>(
@@ -429,8 +429,7 @@ fn add_day_time<T: ArrowTimestampType>(
     let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
     let res = add_days_datetime(res, days)?;
     let res = res.checked_add_signed(Duration::try_milliseconds(ms as i64)?)?;
-    let res = res.naive_utc();
-    T::make_value(res)
+    T::from_naive_datetime(res.naive_utc(), None)
 }
 
 fn add_month_day_nano<T: ArrowTimestampType>(
@@ -443,8 +442,7 @@ fn add_month_day_nano<T: ArrowTimestampType>(
     let res = add_months_datetime(res, months)?;
     let res = add_days_datetime(res, days)?;
     let res = res.checked_add_signed(Duration::nanoseconds(nanos))?;
-    let res = res.naive_utc();
-    T::make_value(res)
+    T::from_naive_datetime(res.naive_utc(), None)
 }
 
 fn subtract_year_months<T: ArrowTimestampType>(
@@ -455,8 +453,7 @@ fn subtract_year_months<T: ArrowTimestampType>(
     let months = IntervalYearMonthType::to_months(delta);
     let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
     let res = sub_months_datetime(res, months)?;
-    let res = res.naive_utc();
-    T::make_value(res)
+    T::from_naive_datetime(res.naive_utc(), None)
 }
 
 fn subtract_day_time<T: ArrowTimestampType>(
@@ -468,8 +465,7 @@ fn subtract_day_time<T: ArrowTimestampType>(
     let res = as_datetime_with_timezone::<T>(timestamp, tz)?;
     let res = sub_days_datetime(res, days)?;
     let res = res.checked_sub_signed(Duration::try_milliseconds(ms as i64)?)?;
-    let res = res.naive_utc();
-    T::make_value(res)
+    T::from_naive_datetime(res.naive_utc(), None)
 }
 
 fn subtract_month_day_nano<T: ArrowTimestampType>(
@@ -482,8 +478,7 @@ fn subtract_month_day_nano<T: ArrowTimestampType>(
     let res = sub_months_datetime(res, months)?;
     let res = sub_days_datetime(res, days)?;
     let res = res.checked_sub_signed(Duration::nanoseconds(nanos))?;
-    let res = res.naive_utc();
-    T::make_value(res)
+    T::from_naive_datetime(res.naive_utc(), None)
 }
 
 impl TimestampSecondType {
diff --git a/arrow-avro/Cargo.toml b/arrow-avro/Cargo.toml
index b7cd7eeb1984..f46ef7e7b999 100644
--- a/arrow-avro/Cargo.toml
+++ b/arrow-avro/Cargo.toml
@@ -56,7 +56,7 @@ arrow-buffer = { workspace = true }
 arrow-array = { workspace = true }
 arrow-select = { workspace = true, optional = true }
 
-object_store = { version = "0.13", default-features = false, optional = true }
+object_store = { workspace = true, optional = true }
 
 bytes = { version = "1.11.0", default-features = false, features = ["std"] }
 serde_json = { version = "1.0", default-features = false, features = ["std"] }
@@ -70,12 +70,12 @@ zstd = { version = "0.13", default-features = false, optional = true }
 bzip2 = { version = "0.6.0", optional = true }
 xz = { package = "liblzma", version = "0.4", default-features = false, optional = true }
 crc = { version = "3.0", optional = true }
-strum_macros = "0.27"
+strum_macros = "0.28"
 uuid = "1.17"
 indexmap = "2.10"
 rand = "0.9"
 md5 = { version = "0.8", optional = true }
-sha2 = { version = "0.10", optional = true }
+sha2 = { version = "0.11", optional = true }
 tokio = { version = "1.0", optional = true, default-features = false, features = ["macros", "rt", "io-util"] }
 
 [dev-dependencies]
@@ -93,7 +93,7 @@ futures = "0.3.31"
 async-stream = "0.3.6"
 apache-avro = "0.21.0"
 num-bigint = "0.4"
-object_store = { version = "0.13", default-features = false, features = ["fs"] }
+object_store = { workspace = true, features = ["fs"] }
 once_cell = "1.21.3"
 half = { version = "2.1", default-features = false }
 tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread", "io-util", "fs"] }
diff --git a/arrow-avro/benches/project_record.rs b/arrow-avro/benches/project_record.rs
index 9bddfea93bb8..91bece6d7e21 100644
--- a/arrow-avro/benches/project_record.rs
+++ b/arrow-avro/benches/project_record.rs
@@ -121,7 +121,22 @@ fn gen_double(mut rng: impl Rng, sc: &ApacheSchema, n: usize, prefix: &[u8]) ->
     )
 }
 
-const READER_SCHEMA: &str = r#"
+fn gen_mixed(mut rng: impl Rng, sc: &ApacheSchema, n: usize, prefix: &[u8]) -> Vec<u8> {
+    encode_records_with_prefix(
+        sc,
+        prefix,
+        (0..n).map(|i| {
+            Value::Record(vec![
+                ("f1".into(), Value::Int(rng.random())),
+                ("f2".into(), Value::Long(rng.random())),
+                ("f3".into(), Value::String(format!("name-{i}"))),
+                ("f4".into(), Value::Double(rng.random())),
+            ])
+        }),
+    )
+}
+
+const SKIP_READER_SCHEMA: &str = r#"
     {
         "type":"record",
         "name":"table",
@@ -175,11 +190,42 @@ const DOUBLE_SCHEMA: &str = r#"
     }
     "#;
 
-fn new_decoder(schema_json: &'static str, batch_size: usize) -> Decoder {
+const MIX_SCHEMA: &str = r#"
+    {
+        "type":"record",
+        "name":"Mix",
+        "fields": [
+            { "name": "f1", "type": "int" },
+            { "name": "f2", "type": "long" },
+            { "name": "f3", "type": "string" },
+            { "name": "f4", "type": "double" }
+        ]
+    }
+    "#;
+
+// Project the record type writen to MIX_SCHEMA:
+// skip "f2" and "f4", add "f5" with a default
+const PROJECT_READER_SCHEMA: &str = r#"
+    {
+        "type":"record",
+        "name":"Mix",
+        "fields": [
+            { "name": "f1", "type": "int" },
+            { "name": "f3", "type": "string" },
+            { "name": "f5", "type": "long", "default": 0 }
+        ]
+    }
+    "#;
+
+fn new_decoder(
+    schema_json: &'static str,
+    reader_schema_json: &'static str,
+    batch_size: usize,
+) -> Decoder {
     let schema = AvroSchema::new(schema_json.to_owned());
     let mut store = SchemaStore::new();
     store.register(schema).unwrap();
-    let reader_schema = AvroSchema::new(READER_SCHEMA.to_owned());
+    let reader_schema = AvroSchema::new(reader_schema_json.to_owned());
     ReaderBuilder::new()
         .with_writer_schema_store(store)
         .with_batch_size(batch_size)
@@ -215,19 +261,24 @@ fn bench_with_decoder<F>(
 fn criterion_benches(c: &mut Criterion) {
     let data = gen_avro_data_with(INT_SCHEMA, NUM_ROWS, gen_int);
     bench_with_decoder(c, "skip_int", &data, NUM_ROWS, || {
-        new_decoder(INT_SCHEMA, BATCH_SIZE)
+        new_decoder(INT_SCHEMA, SKIP_READER_SCHEMA, BATCH_SIZE)
     });
     let data = gen_avro_data_with(LONG_SCHEMA, NUM_ROWS, gen_long);
     bench_with_decoder(c, "skip_long", &data, NUM_ROWS, || {
-        new_decoder(LONG_SCHEMA, BATCH_SIZE)
+        new_decoder(LONG_SCHEMA, SKIP_READER_SCHEMA, BATCH_SIZE)
     });
     let data = gen_avro_data_with(FLOAT_SCHEMA, NUM_ROWS, gen_float);
     bench_with_decoder(c, "skip_float", &data, NUM_ROWS, || {
-        new_decoder(FLOAT_SCHEMA, BATCH_SIZE)
+        new_decoder(FLOAT_SCHEMA, SKIP_READER_SCHEMA, BATCH_SIZE)
     });
     let data = gen_avro_data_with(DOUBLE_SCHEMA, NUM_ROWS, gen_double);
     bench_with_decoder(c, "skip_double", &data, NUM_ROWS, || {
-        new_decoder(DOUBLE_SCHEMA, BATCH_SIZE)
+        new_decoder(DOUBLE_SCHEMA, SKIP_READER_SCHEMA, BATCH_SIZE)
+    });
+
+    let data = gen_avro_data_with(MIX_SCHEMA, NUM_ROWS, gen_mixed);
+    bench_with_decoder(c, "project_primitives", &data, NUM_ROWS, || {
+        new_decoder(MIX_SCHEMA, PROJECT_READER_SCHEMA, BATCH_SIZE)
     });
 }
 
diff --git a/arrow-avro/src/codec.rs b/arrow-avro/src/codec.rs
index d54c6602dad6..92a0ed051951 100644
--- a/arrow-avro/src/codec.rs
+++ b/arrow-avro/src/codec.rs
@@ -84,14 +84,20 @@ pub(crate) enum AvroLiteral {
 /// Contains the necessary information to resolve a writer's record against a reader's record schema.
 #[derive(Debug, Clone, PartialEq)]
 pub(crate) struct ResolvedRecord {
-    /// Maps a writer's field index to the corresponding reader's field index.
-    /// `None` if the writer's field is not present in the reader's schema.
-    pub(crate) writer_to_reader: Arc<[Option<usize>]>,
+    /// Maps a writer's field index to the field's resolution against the reader's schema.
+    pub(crate) writer_fields: Arc<[ResolvedField]>,
     /// A list of indices in the reader's schema for fields that have a default value.
     pub(crate) default_fields: Arc<[usize]>,
+}
+
+/// Resolution information for record fields in the writer schema.
+#[derive(Debug, Clone, PartialEq)]
+pub(crate) enum ResolvedField {
+    /// Resolves to a field indexed in the reader schema.
+    ToReader(usize),
     /// For fields present in the writer's schema but not the reader's, this stores their data type.
     /// This is needed to correctly skip over these fields during deserialization.
-    pub(crate) skip_fields: Arc<[Option<AvroDataType>]>,
+    Skip(AvroDataType),
 }
 
 /// Defines the type of promotion to be applied during schema resolution.
@@ -141,7 +147,7 @@ impl Display for Promotion {
 pub(crate) struct ResolvedUnion {
     /// For each writer branch index, the reader branch index and how to read it.
     /// `None` means the writer branch doesn't resolve against the reader.
-    pub(crate) writer_to_reader: Arc<[Option<(usize, Promotion)>]>,
+    pub(crate) writer_to_reader: Arc<[Option<(usize, ResolutionInfo)>]>,
     /// Whether the writer schema at this site is a union
     pub(crate) writer_is_union: bool,
     /// Whether the reader schema at this site is a union
@@ -615,7 +621,7 @@ impl<'a> TryFrom<&Schema<'a>> for AvroField {
     fn try_from(schema: &Schema<'a>) -> Result<Self, Self::Error> {
         match schema {
             Schema::Complex(ComplexType::Record(r)) => {
-                let mut resolver = Maker::new(false, false);
+                let mut resolver = Maker::new(false, false, Tz::default());
                 let data_type = resolver.make_data_type(schema, None, None)?;
                 Ok(AvroField {
                     data_type,
@@ -636,6 +642,7 @@ pub(crate) struct AvroFieldBuilder<'a> {
     reader_schema: Option<&'a Schema<'a>>,
     use_utf8view: bool,
     strict_mode: bool,
+    tz: Tz,
 }
 
 impl<'a> AvroFieldBuilder<'a> {
@@ -646,6 +653,7 @@ impl<'a> AvroFieldBuilder<'a> {
             reader_schema: None,
             use_utf8view: false,
             strict_mode: false,
+            tz: Tz::default(),
         }
     }
 
@@ -671,11 +679,17 @@ impl<'a> AvroFieldBuilder<'a> {
         self
     }
 
+    /// Sets the timezone representation for timestamps.
+    pub(crate) fn with_tz(mut self, tz: Tz) -> Self {
+        self.tz = tz;
+        self
+    }
+
     /// Build an [`AvroField`] from the builder
     pub(crate) fn build(self) -> Result<AvroField, ArrowError> {
         match self.writer_schema {
             Schema::Complex(ComplexType::Record(r)) => {
-                let mut resolver = Maker::new(self.use_utf8view, self.strict_mode);
+                let mut resolver = Maker::new(self.use_utf8view, self.strict_mode, self.tz);
                 let data_type =
                     resolver.make_data_type(self.writer_schema, self.reader_schema, None)?;
                 Ok(AvroField {
@@ -691,6 +705,36 @@ impl<'a> AvroFieldBuilder<'a> {
     }
 }
 
+/// Timezone representation for timestamps.
+///
+/// Avro only distinguishes between UTC and local time (no timezone), but Arrow supports
+/// any of the two identifiers of the UTC timezone: "+00:00" and "UTC".
+/// The data types using these time zone IDs behave identically, but are not logically equal.
+#[derive(Debug, Copy, Clone, PartialEq, Default)]
+pub enum Tz {
+    /// Represent Avro `timestamp-*` logical types with "+00:00" timezone ID
+    #[default]
+    OffsetZero,
+    /// Represent Avro `timestamp-*` logical types with "UTC" timezone ID
+    Utc,
+}
+
+impl Tz {
+    /// Returns the string identifier for this timezone representation
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            Self::OffsetZero => "+00:00",
+            Self::Utc => "UTC",
+        }
+    }
+}
+
+impl Display for Tz {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(self.as_str())
+    }
+}
+
 /// An Avro encoding
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#encodings>
@@ -726,18 +770,18 @@ pub(crate) enum Codec {
     /// Represents Avro timestamp-millis or local-timestamp-millis logical type
     ///
     /// Maps to Arrow's Timestamp(TimeUnit::Millisecond) data type
-    /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false)
-    TimestampMillis(bool),
+    /// The parameter indicates whether the timestamp has a UTC timezone (Some) or is local time (None)
+    TimestampMillis(Option<Tz>),
     /// Represents Avro timestamp-micros or local-timestamp-micros logical type
     ///
     /// Maps to Arrow's Timestamp(TimeUnit::Microsecond) data type
-    /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false)
-    TimestampMicros(bool),
+    /// The parameter indicates whether the timestamp has a UTC timezone (Some) or is local time (None)
+    TimestampMicros(Option<Tz>),
     /// Represents Avro timestamp-nanos or local-timestamp-nanos logical type
     ///
     /// Maps to Arrow's Timestamp(TimeUnit::Nanosecond) data type
-    /// The boolean parameter indicates whether the timestamp has a UTC timezone (true) or is local time (false)
-    TimestampNanos(bool),
+    /// The parameter indicates whether the timestamp has a UTC timezone (Some) or is local time (None)
+    TimestampNanos(Option<Tz>),
     /// Represents Avro fixed type, maps to Arrow's FixedSizeBinary data type
     /// The i32 parameter indicates the fixed binary size
     Fixed(i32),
@@ -838,15 +882,18 @@ impl Codec {
             Self::Date32 => DataType::Date32,
             Self::TimeMillis => DataType::Time32(TimeUnit::Millisecond),
             Self::TimeMicros => DataType::Time64(TimeUnit::Microsecond),
-            Self::TimestampMillis(is_utc) => {
-                DataType::Timestamp(TimeUnit::Millisecond, is_utc.then(|| "+00:00".into()))
-            }
-            Self::TimestampMicros(is_utc) => {
-                DataType::Timestamp(TimeUnit::Microsecond, is_utc.then(|| "+00:00".into()))
-            }
-            Self::TimestampNanos(is_utc) => {
-                DataType::Timestamp(TimeUnit::Nanosecond, is_utc.then(|| "+00:00".into()))
-            }
+            Self::TimestampMillis(tz) => DataType::Timestamp(
+                TimeUnit::Millisecond,
+                tz.as_ref().map(|tz| tz.as_str().into()),
+            ),
+            Self::TimestampMicros(tz) => DataType::Timestamp(
+                TimeUnit::Microsecond,
+                tz.as_ref().map(|tz| tz.as_str().into()),
+            ),
+            Self::TimestampNanos(tz) => DataType::Timestamp(
+                TimeUnit::Nanosecond,
+                tz.as_ref().map(|tz| tz.as_str().into()),
+            ),
             Self::Interval => DataType::Interval(IntervalUnit::MonthDayNano),
             Self::Fixed(size) => DataType::FixedSizeBinary(*size),
             Self::Decimal(precision, scale, _size) => {
@@ -1106,12 +1153,15 @@ impl From<&Codec> for UnionFieldKind {
             Codec::Date32 => Self::Date,
             Codec::TimeMillis => Self::TimeMillis,
             Codec::TimeMicros => Self::TimeMicros,
-            Codec::TimestampMillis(true) => Self::TimestampMillisUtc,
-            Codec::TimestampMillis(false) => Self::TimestampMillisLocal,
-            Codec::TimestampMicros(true) => Self::TimestampMicrosUtc,
-            Codec::TimestampMicros(false) => Self::TimestampMicrosLocal,
-            Codec::TimestampNanos(true) => Self::TimestampNanosUtc,
-            Codec::TimestampNanos(false) => Self::TimestampNanosLocal,
+            Codec::TimestampMillis(Some(Tz::OffsetZero)) => Self::TimestampMillisUtc,
+            Codec::TimestampMillis(Some(Tz::Utc)) => Self::TimestampMillisUtc,
+            Codec::TimestampMillis(None) => Self::TimestampMillisLocal,
+            Codec::TimestampMicros(Some(Tz::OffsetZero)) => Self::TimestampMicrosUtc,
+            Codec::TimestampMicros(Some(Tz::Utc)) => Self::TimestampMicrosUtc,
+            Codec::TimestampMicros(None) => Self::TimestampMicrosLocal,
+            Codec::TimestampNanos(Some(Tz::OffsetZero)) => Self::TimestampNanosUtc,
+            Codec::TimestampNanos(Some(Tz::Utc)) => Self::TimestampNanosUtc,
+            Codec::TimestampNanos(None) => Self::TimestampNanosLocal,
             Codec::Interval => Self::Duration,
             Codec::Fixed(_) => Self::Fixed,
             Codec::Decimal(..) => Self::Decimal,
@@ -1332,14 +1382,16 @@ struct Maker<'a> {
     resolver: Resolver<'a>,
     use_utf8view: bool,
     strict_mode: bool,
+    tz: Tz,
 }
 
 impl<'a> Maker<'a> {
-    fn new(use_utf8view: bool, strict_mode: bool) -> Self {
+    fn new(use_utf8view: bool, strict_mode: bool, tz: Tz) -> Self {
         Self {
             resolver: Default::default(),
             use_utf8view,
             strict_mode,
+            tz,
         }
     }
 
@@ -1603,20 +1655,22 @@ impl<'a> Maker<'a> {
                     (Some("time-millis"), c @ Codec::Int32) => *c = Codec::TimeMillis,
                     (Some("time-micros"), c @ Codec::Int64) => *c = Codec::TimeMicros,
                     (Some("timestamp-millis"), c @ Codec::Int64) => {
-                        *c = Codec::TimestampMillis(true)
+                        *c = Codec::TimestampMillis(Some(self.tz))
                     }
                     (Some("timestamp-micros"), c @ Codec::Int64) => {
-                        *c = Codec::TimestampMicros(true)
+                        *c = Codec::TimestampMicros(Some(self.tz))
                     }
                     (Some("local-timestamp-millis"), c @ Codec::Int64) => {
-                        *c = Codec::TimestampMillis(false)
+                        *c = Codec::TimestampMillis(None)
                     }
                     (Some("local-timestamp-micros"), c @ Codec::Int64) => {
-                        *c = Codec::TimestampMicros(false)
+                        *c = Codec::TimestampMicros(None)
+                    }
+                    (Some("timestamp-nanos"), c @ Codec::Int64) => {
+                        *c = Codec::TimestampNanos(Some(self.tz))
                     }
-                    (Some("timestamp-nanos"), c @ Codec::Int64) => *c = Codec::TimestampNanos(true),
                     (Some("local-timestamp-nanos"), c @ Codec::Int64) => {
-                        *c = Codec::TimestampNanos(false)
+                        *c = Codec::TimestampNanos(None)
                     }
                     (Some("uuid"), c @ Codec::Utf8) => {
                         // Map Avro string+logicalType=uuid into the UUID Codec,
@@ -1715,7 +1769,7 @@ impl<'a> Maker<'a> {
                         .and_then(|v| v.as_str())
                     {
                         if unit == "nanosecond" {
-                            field.codec = Codec::TimestampNanos(false);
+                            field.codec = Codec::TimestampNanos(Some(self.tz));
                         }
                     }
                 }
@@ -1748,9 +1802,21 @@ impl<'a> Maker<'a> {
                     nullable_union_variants(writer_variants),
                     nullable_union_variants(reader_variants),
                 ) {
-                    (Some((w_nb, w_nonnull)), Some((_r_nb, r_nonnull))) => {
-                        let mut dt = self.make_data_type(w_nonnull, Some(r_nonnull), namespace)?;
+                    (Some((w_nb, w_nonnull)), Some((r_nb, r_nonnull))) => {
+                        let mut dt = self.resolve_type(w_nonnull, r_nonnull, namespace)?;
+                        let mut writer_to_reader = vec![None, None];
+                        writer_to_reader[w_nb.non_null_index()] = Some((
+                            r_nb.non_null_index(),
+                            dt.resolution
+                                .take()
+                                .unwrap_or(ResolutionInfo::Promotion(Promotion::Direct)),
+                        ));
                         dt.nullability = Some(w_nb);
+                        dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion {
+                            writer_to_reader: Arc::from(writer_to_reader),
+                            writer_is_union: true,
+                            reader_is_union: true,
+                        }));
                         #[cfg(feature = "avro_custom_types")]
                         Self::propagate_nullability_into_ree(&mut dt, w_nb);
                         Ok(dt)
@@ -1759,12 +1825,17 @@ impl<'a> Maker<'a> {
                 }
             }
             (Schema::Union(writer_variants), reader_non_union) => {
-                let writer_to_reader: Vec<Option<(usize, Promotion)>> = writer_variants
+                let writer_to_reader: Vec<Option<(usize, ResolutionInfo)>> = writer_variants
                     .iter()
                     .map(|writer| {
                         self.resolve_type(writer, reader_non_union, namespace)
                             .ok()
-                            .map(|tmp| (0usize, Self::coercion_from(&tmp)))
+                            .map(|tmp| {
+                                let resolution = tmp
+                                    .resolution
+                                    .unwrap_or(ResolutionInfo::Promotion(Promotion::Direct));
+                                (0usize, resolution)
+                            })
                     })
                     .collect();
                 let mut dt = self.parse_type(reader_non_union, namespace)?;
@@ -1780,54 +1851,44 @@ impl<'a> Maker<'a> {
                     nullable_union_variants(reader_variants)
                 {
                     let mut dt = self.resolve_type(writer_non_union, non_null_branch, namespace)?;
-                    let non_null_idx = match nullability {
-                        Nullability::NullFirst => 1,
-                        Nullability::NullSecond => 0,
-                    };
                     #[cfg(feature = "avro_custom_types")]
                     Self::propagate_nullability_into_ree(&mut dt, nullability);
                     dt.nullability = Some(nullability);
-                    let promotion = Self::coercion_from(&dt);
-                    dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion {
-                        writer_to_reader: Arc::from(vec![Some((non_null_idx, promotion))]),
-                        writer_is_union: false,
-                        reader_is_union: true,
-                    }));
+                    // Ensure resolution is set to a non-Union variant to suppress
+                    // reading the union tag which is the default behavior.
+                    if dt.resolution.is_none() {
+                        dt.resolution = Some(ResolutionInfo::Promotion(Promotion::Direct));
+                    }
                     Ok(dt)
                 } else {
-                    let mut best_match: Option<(usize, AvroDataType, Promotion)> = None;
-                    for (i, variant) in reader_variants.iter().enumerate() {
-                        if let Ok(resolved_dt) =
-                            self.resolve_type(writer_non_union, variant, namespace)
-                        {
-                            let promotion = Self::coercion_from(&resolved_dt);
-                            if promotion == Promotion::Direct {
-                                best_match = Some((i, resolved_dt, promotion));
-                                break;
-                            } else if best_match.is_none() {
-                                best_match = Some((i, resolved_dt, promotion));
-                            }
-                        }
-                    }
-                    let Some((match_idx, match_dt, promotion)) = best_match else {
+                    let Some((match_idx, mut match_dt)) =
+                        self.find_best_union_match(writer_non_union, reader_variants, namespace)
+                    else {
                         return Err(ArrowError::SchemaError(
                             "Writer schema does not match any reader union branch".to_string(),
                         ));
                     };
-                    let mut children = Vec::with_capacity(reader_variants.len());
+                    // Steal the resolution info from the matching reader branch
+                    // for the Union resolution, but preserve possible resolution
+                    // information on its inner types.
+                    // For other branches, resolution is irrelevant,
+                    // so just parse them.
+                    let resolution = match_dt
+                        .resolution
+                        .take()
+                        .unwrap_or(ResolutionInfo::Promotion(Promotion::Direct));
                     let mut match_dt = Some(match_dt);
-                    for (i, variant) in reader_variants.iter().enumerate() {
-                        if i == match_idx {
-                            if let Some(mut dt) = match_dt.take() {
-                                if matches!(dt.resolution, Some(ResolutionInfo::Promotion(_))) {
-                                    dt.resolution = None;
-                                }
-                                children.push(dt);
+                    let children = reader_variants
+                        .iter()
+                        .enumerate()
+                        .map(|(idx, variant)| {
+                            if idx == match_idx {
+                                Ok(match_dt.take().unwrap())
+                            } else {
+                                self.parse_type(variant, namespace)
                             }
-                        } else {
-                            children.push(self.parse_type(variant, namespace)?);
-                        }
-                    }
+                        })
+                        .collect::<Result<Vec<_>, _>>()?;
                     let union_fields = build_union_fields(&children)?;
                     let mut dt = AvroDataType::new(
                         Codec::Union(children.into(), union_fields, UnionMode::Dense),
@@ -1835,7 +1896,7 @@ impl<'a> Maker<'a> {
                         None,
                     );
                     dt.resolution = Some(ResolutionInfo::Union(ResolvedUnion {
-                        writer_to_reader: Arc::from(vec![Some((match_idx, promotion))]),
+                        writer_to_reader: Arc::from(vec![Some((match_idx, resolution))]),
                         writer_is_union: false,
                         reader_is_union: true,
                     }));
@@ -1870,34 +1931,30 @@ impl<'a> Maker<'a> {
         }
     }
 
-    #[inline]
-    fn coercion_from(dt: &AvroDataType) -> Promotion {
-        match dt.resolution.as_ref() {
-            Some(ResolutionInfo::Promotion(promotion)) => *promotion,
-            _ => Promotion::Direct,
-        }
-    }
-
-    fn find_best_promotion(
+    fn find_best_union_match(
         &mut self,
         writer: &Schema<'a>,
         reader_variants: &[Schema<'a>],
         namespace: Option<&'a str>,
-    ) -> Option<(usize, Promotion)> {
-        let mut first_promotion: Option<(usize, Promotion)> = None;
+    ) -> Option<(usize, AvroDataType)> {
+        let mut first_resolution = None;
         for (reader_index, reader) in reader_variants.iter().enumerate() {
-            if let Ok(tmp) = self.resolve_type(writer, reader, namespace) {
-                let promotion = Self::coercion_from(&tmp);
-                if promotion == Promotion::Direct {
-                    // An exact match is best, return immediately.
-                    return Some((reader_index, promotion));
-                } else if first_promotion.is_none() {
-                    // Store the first valid promotion but keep searching for a direct match.
-                    first_promotion = Some((reader_index, promotion));
-                }
+            if let Ok(dt) = self.resolve_type(writer, reader, namespace) {
+                match &dt.resolution {
+                    None | Some(ResolutionInfo::Promotion(Promotion::Direct)) => {
+                        // An exact match is best, return immediately.
+                        return Some((reader_index, dt));
+                    }
+                    Some(_) => {
+                        if first_resolution.is_none() {
+                            // Store the first valid promotion but keep searching for a direct match.
+                            first_resolution = Some((reader_index, dt));
+                        }
+                    }
+                };
             }
         }
-        first_promotion
+        first_resolution
     }
 
     fn resolve_unions<'s>(
@@ -1906,15 +1963,34 @@ impl<'a> Maker<'a> {
         reader_variants: &'s [Schema<'a>],
         namespace: Option<&'a str>,
     ) -> Result<AvroDataType, ArrowError> {
+        let mut resolved_reader_encodings = HashMap::new();
+        let writer_to_reader: Vec<Option<(usize, ResolutionInfo)>> = writer_variants
+            .iter()
+            .map(|writer| {
+                self.find_best_union_match(writer, reader_variants, namespace)
+                    .map(|(match_idx, mut match_dt)| {
+                        let resolution = match_dt
+                            .resolution
+                            .take()
+                            .unwrap_or(ResolutionInfo::Promotion(Promotion::Direct));
+                        // TODO: check for overlapping reader variants?
+                        // They should not be possible in a valid schema.
+                        resolved_reader_encodings.insert(match_idx, match_dt);
+                        (match_idx, resolution)
+                    })
+            })
+            .collect();
         let reader_encodings: Vec<AvroDataType> = reader_variants
             .iter()
-            .map(|reader_schema| self.parse_type(reader_schema, namespace))
+            .enumerate()
+            .map(|(reader_idx, reader_schema)| {
+                if let Some(resolved) = resolved_reader_encodings.remove(&reader_idx) {
+                    Ok(resolved)
+                } else {
+                    self.parse_type(reader_schema, namespace)
+                }
+            })
             .collect::<Result<_, _>>()?;
-        let mut writer_to_reader: Vec<Option<(usize, Promotion)>> =
-            Vec::with_capacity(writer_variants.len());
-        for writer in writer_variants {
-            writer_to_reader.push(self.find_best_promotion(writer, reader_variants, namespace));
-        }
         let union_fields = build_union_fields(&reader_encodings)?;
         let mut dt = AvroDataType::new(
             Codec::Union(reader_encodings.into(), union_fields, UnionMode::Dense),
@@ -2179,7 +2255,14 @@ impl<'a> Maker<'a> {
         )?;
         let writer_ns = writer_record.namespace.or(namespace);
         let reader_ns = reader_record.namespace.or(namespace);
-        let reader_md = reader_record.attributes.field_metadata();
+        let mut reader_md = reader_record.attributes.field_metadata();
+        reader_md.insert(
+            AVRO_NAME_METADATA_KEY.to_string(),
+            reader_record.name.to_string(),
+        );
+        if let Some(ns) = reader_ns {
+            reader_md.insert(AVRO_NAMESPACE_METADATA_KEY.to_string(), ns.to_string());
+        }
         // Build writer lookup and ambiguous alias set.
         let (writer_lookup, ambiguous_writer_aliases) = Self::build_writer_lookup(writer_record);
         let mut writer_to_reader: Vec<Option<usize>> = vec![None; writer_record.fields.len()];
@@ -2252,24 +2335,27 @@ impl<'a> Maker<'a> {
                 data_type: dt,
             });
         }
-        // Build skip_fields in writer order; pre-size and push.
-        let mut skip_fields: Vec<Option<AvroDataType>> =
-            Vec::with_capacity(writer_record.fields.len());
-        for (writer_index, writer_field) in writer_record.fields.iter().enumerate() {
-            if writer_to_reader[writer_index].is_some() {
-                skip_fields.push(None);
-            } else {
-                skip_fields.push(Some(self.parse_type(&writer_field.r#type, writer_ns)?));
-            }
-        }
+        // Build writer field map.
+        let writer_fields = writer_record
+            .fields
+            .iter()
+            .enumerate()
+            .map(|(writer_index, writer_field)| {
+                if let Some(reader_index) = writer_to_reader[writer_index] {
+                    Ok(ResolvedField::ToReader(reader_index))
+                } else {
+                    let dt = self.parse_type(&writer_field.r#type, writer_ns)?;
+                    Ok(ResolvedField::Skip(dt))
+                }
+            })
+            .collect::<Result<_, ArrowError>>()?;
         let resolved = AvroDataType::new_with_resolution(
             Codec::Struct(Arc::from(reader_fields)),
             reader_md,
             None,
             Some(ResolutionInfo::Record(ResolvedRecord {
-                writer_to_reader: Arc::from(writer_to_reader),
+                writer_fields,
                 default_fields: Arc::from(default_fields),
-                skip_fields: Arc::from(skip_fields),
             })),
         );
         // Register a resolved record by reader name+namespace for potential named type refs.
@@ -2307,7 +2393,7 @@ mod tests {
     fn resolve_promotion(writer: PrimitiveType, reader: PrimitiveType) -> AvroDataType {
         let writer_schema = Schema::TypeName(TypeName::Primitive(writer));
         let reader_schema = Schema::TypeName(TypeName::Primitive(reader));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .expect("promotion should resolve")
@@ -2324,7 +2410,7 @@ mod tests {
     fn test_date_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Int, "date");
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::Date32));
@@ -2334,7 +2420,7 @@ mod tests {
     fn test_time_millis_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Int, "time-millis");
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::TimeMillis));
@@ -2344,7 +2430,7 @@ mod tests {
     fn test_time_micros_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Long, "time-micros");
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::TimeMicros));
@@ -2352,42 +2438,77 @@ mod tests {
 
     #[test]
     fn test_timestamp_millis_logical_type() {
-        let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-millis");
+        for tz in [Tz::OffsetZero, Tz::Utc] {
+            let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-millis");
 
-        let mut maker = Maker::new(false, false);
-        let result = maker.make_data_type(&schema, None, None).unwrap();
+            let mut maker = Maker::new(false, false, tz);
+            let result = maker.make_data_type(&schema, None, None).unwrap();
 
-        assert!(matches!(result.codec, Codec::TimestampMillis(true)));
+            let Codec::TimestampMillis(Some(actual_tz)) = result.codec else {
+                panic!("Expected TimestampMillis codec");
+            };
+            assert_eq!(actual_tz, tz);
+        }
     }
 
     #[test]
     fn test_timestamp_micros_logical_type() {
-        let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-micros");
+        for tz in [Tz::OffsetZero, Tz::Utc] {
+            let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-micros");
 
-        let mut maker = Maker::new(false, false);
-        let result = maker.make_data_type(&schema, None, None).unwrap();
+            let mut maker = Maker::new(false, false, tz);
+            let result = maker.make_data_type(&schema, None, None).unwrap();
+
+            let Codec::TimestampMicros(Some(actual_tz)) = result.codec else {
+                panic!("Expected TimestampMicros codec");
+            };
+            assert_eq!(actual_tz, tz);
+        }
+    }
 
-        assert!(matches!(result.codec, Codec::TimestampMicros(true)));
+    #[test]
+    fn test_timestamp_nanos_logical_type() {
+        for tz in [Tz::OffsetZero, Tz::Utc] {
+            let schema = create_schema_with_logical_type(PrimitiveType::Long, "timestamp-nanos");
+
+            let mut maker = Maker::new(false, false, tz);
+            let result = maker.make_data_type(&schema, None, None).unwrap();
+
+            let Codec::TimestampNanos(Some(actual_tz)) = result.codec else {
+                panic!("Expected TimestampNanos codec");
+            };
+            assert_eq!(actual_tz, tz);
+        }
     }
 
     #[test]
     fn test_local_timestamp_millis_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-millis");
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
-        assert!(matches!(result.codec, Codec::TimestampMillis(false)));
+        assert!(matches!(result.codec, Codec::TimestampMillis(None)));
     }
 
     #[test]
     fn test_local_timestamp_micros_logical_type() {
         let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-micros");
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
-        assert!(matches!(result.codec, Codec::TimestampMicros(false)));
+        assert!(matches!(result.codec, Codec::TimestampMicros(None)));
+    }
+
+    #[test]
+    fn test_local_timestamp_nanos_logical_type() {
+        let schema = create_schema_with_logical_type(PrimitiveType::Long, "local-timestamp-nanos");
+
+        let mut maker = Maker::new(false, false, Tz::default());
+        let result = maker.make_data_type(&schema, None, None).unwrap();
+
+        assert!(matches!(result.codec, Codec::TimestampNanos(None)));
     }
 
     #[test]
@@ -2436,7 +2557,7 @@ mod tests {
     fn test_unknown_logical_type_added_to_metadata() {
         let schema = create_schema_with_logical_type(PrimitiveType::Int, "custom-type");
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert_eq!(
@@ -2449,7 +2570,7 @@ mod tests {
     fn test_string_with_utf8view_enabled() {
         let schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String));
 
-        let mut maker = Maker::new(true, false);
+        let mut maker = Maker::new(true, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::Utf8View));
@@ -2459,7 +2580,7 @@ mod tests {
     fn test_string_without_utf8view_enabled() {
         let schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String));
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
         assert!(matches!(result.codec, Codec::Utf8));
@@ -2488,7 +2609,7 @@ mod tests {
 
         let schema = Schema::Complex(ComplexType::Record(record));
 
-        let mut maker = Maker::new(true, false);
+        let mut maker = Maker::new(true, false, Tz::default());
         let result = maker.make_data_type(&schema, None, None).unwrap();
 
         if let Codec::Struct(fields) = &result.codec {
@@ -2506,7 +2627,7 @@ mod tests {
             Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
         ]);
 
-        let mut maker = Maker::new(false, true);
+        let mut maker = Maker::new(false, true, Tz::default());
         let result = maker.make_data_type(&schema, None, None);
 
         assert!(result.is_err());
@@ -2594,7 +2715,7 @@ mod tests {
     fn test_resolve_illegal_promotion_double_to_float_errors() {
         let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Double));
         let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Float));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&writer_schema, Some(&reader_schema), None);
         assert!(result.is_err());
         match result {
@@ -2615,12 +2736,20 @@ mod tests {
             Schema::TypeName(TypeName::Primitive(PrimitiveType::Double)),
             Schema::TypeName(TypeName::Primitive(PrimitiveType::Null)),
         ]);
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker.make_data_type(&writer, Some(&reader), None).unwrap();
         assert!(matches!(result.codec, Codec::Float64));
         assert_eq!(
             result.resolution,
-            Some(ResolutionInfo::Promotion(Promotion::IntToDouble))
+            Some(ResolutionInfo::Union(ResolvedUnion {
+                writer_to_reader: [
+                    None,
+                    Some((0, ResolutionInfo::Promotion(Promotion::IntToDouble)))
+                ]
+                .into(),
+                writer_is_union: true,
+                reader_is_union: true,
+            }))
         );
         assert_eq!(result.nullability, Some(Nullability::NullFirst));
     }
@@ -2632,7 +2761,7 @@ mod tests {
             mk_primitive(PrimitiveType::Long),
         ]);
         let reader = mk_primitive(PrimitiveType::Bytes);
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
         assert!(matches!(dt.codec(), Codec::Binary));
         let resolved = match dt.resolution {
@@ -2642,7 +2771,10 @@ mod tests {
         assert!(resolved.writer_is_union && !resolved.reader_is_union);
         assert_eq!(
             resolved.writer_to_reader.as_ref(),
-            &[Some((0, Promotion::StringToBytes)), None]
+            &[
+                Some((0, ResolutionInfo::Promotion(Promotion::StringToBytes))),
+                None
+            ]
         );
     }
 
@@ -2653,7 +2785,7 @@ mod tests {
             mk_primitive(PrimitiveType::Long),
             mk_primitive(PrimitiveType::Double),
         ]);
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
         let resolved = match dt.resolution {
             Some(ResolutionInfo::Union(u)) => u,
@@ -2662,7 +2794,7 @@ mod tests {
         assert!(!resolved.writer_is_union && resolved.reader_is_union);
         assert_eq!(
             resolved.writer_to_reader.as_ref(),
-            &[Some((0, Promotion::Direct))]
+            &[Some((0, ResolutionInfo::Promotion(Promotion::Direct)))]
         );
     }
 
@@ -2674,7 +2806,7 @@ mod tests {
             mk_primitive(PrimitiveType::Long),
             mk_primitive(PrimitiveType::String),
         ]);
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
         let resolved = match dt.resolution {
             Some(ResolutionInfo::Union(u)) => u,
@@ -2682,7 +2814,194 @@ mod tests {
         };
         assert_eq!(
             resolved.writer_to_reader.as_ref(),
-            &[Some((1, Promotion::IntToLong))]
+            &[Some((1, ResolutionInfo::Promotion(Promotion::IntToLong)))]
+        );
+    }
+
+    #[test]
+    fn test_resolve_writer_non_union_to_reader_union_preserves_inner_record_defaults() {
+        // Writer: record Inner{a: int}
+        // Reader: union [Inner{a: int, b: int default 42}, string]
+        // The matching child (Inner) should preserve DefaultValue(Int(42)) on field b.
+        let writer = Schema::Complex(ComplexType::Record(Record {
+            name: "Inner",
+            namespace: None,
+            doc: None,
+            aliases: vec![],
+            fields: vec![AvroFieldSchema {
+                name: "a",
+                doc: None,
+                r#type: mk_primitive(PrimitiveType::Int),
+                default: None,
+                aliases: vec![],
+            }],
+            attributes: Attributes::default(),
+        }));
+        let reader = mk_union(vec![
+            Schema::Complex(ComplexType::Record(Record {
+                name: "Inner",
+                namespace: None,
+                doc: None,
+                aliases: vec![],
+                fields: vec![
+                    AvroFieldSchema {
+                        name: "a",
+                        doc: None,
+                        r#type: mk_primitive(PrimitiveType::Int),
+                        default: None,
+                        aliases: vec![],
+                    },
+                    AvroFieldSchema {
+                        name: "b",
+                        doc: None,
+                        r#type: mk_primitive(PrimitiveType::Int),
+                        default: Some(Value::Number(serde_json::Number::from(42))),
+                        aliases: vec![],
+                    },
+                ],
+                attributes: Attributes::default(),
+            })),
+            mk_primitive(PrimitiveType::String),
+        ]);
+        let mut maker = Maker::new(false, false, Default::default());
+        let dt = maker
+            .make_data_type(&writer, Some(&reader), None)
+            .expect("resolution should succeed");
+        // Verify the union resolution structure
+        let resolved = match dt.resolution.as_ref() {
+            Some(ResolutionInfo::Union(u)) => u,
+            other => panic!("expected union resolution info, got {other:?}"),
+        };
+        assert!(!resolved.writer_is_union && resolved.reader_is_union);
+        assert_eq!(
+            resolved.writer_to_reader.len(),
+            1,
+            "expected the non-union record to resolve to a union variant"
+        );
+        let resolution = match resolved.writer_to_reader.first().unwrap() {
+            Some((0, resolution)) => resolution,
+            other => panic!("unexpected writer-to-reader table value {other:?}"),
+        };
+        match resolution {
+            ResolutionInfo::Record(ResolvedRecord {
+                writer_fields,
+                default_fields,
+            }) => {
+                assert_eq!(writer_fields.len(), 1);
+                assert_eq!(writer_fields[0], ResolvedField::ToReader(0));
+                assert_eq!(default_fields.len(), 1);
+                assert_eq!(default_fields[0], 1);
+            }
+            other => panic!("unexpected resolution {other:?}"),
+        }
+        // The matching child (Inner at index 0) should have field b with DefaultValue
+        let children = match dt.codec() {
+            Codec::Union(children, _, _) => children,
+            other => panic!("expected union codec, got {other:?}"),
+        };
+        let inner_fields = match children[0].codec() {
+            Codec::Struct(f) => f,
+            other => panic!("expected struct codec for Inner, got {other:?}"),
+        };
+        assert_eq!(inner_fields.len(), 2);
+        assert_eq!(inner_fields[1].name(), "b");
+        assert_eq!(
+            inner_fields[1].data_type().resolution,
+            Some(ResolutionInfo::DefaultValue(AvroLiteral::Int(42))),
+            "field b should have DefaultValue(Int(42)) from schema resolution"
+        );
+    }
+
+    #[test]
+    fn test_resolve_writer_union_to_reader_union_preserves_inner_record_defaults() {
+        // Writer: record [string, Inner{a: int}]
+        // Reader: union [Inner{a: int, b: int default 42}, string]
+        // The matching child (Inner) should preserve DefaultValue(Int(42)) on field b.
+        let writer = mk_union(vec![
+            mk_primitive(PrimitiveType::String),
+            Schema::Complex(ComplexType::Record(Record {
+                name: "Inner",
+                namespace: None,
+                doc: None,
+                aliases: vec![],
+                fields: vec![AvroFieldSchema {
+                    name: "a",
+                    doc: None,
+                    r#type: mk_primitive(PrimitiveType::Int),
+                    default: None,
+                    aliases: vec![],
+                }],
+                attributes: Attributes::default(),
+            })),
+        ]);
+        let reader = mk_union(vec![
+            Schema::Complex(ComplexType::Record(Record {
+                name: "Inner",
+                namespace: None,
+                doc: None,
+                aliases: vec![],
+                fields: vec![
+                    AvroFieldSchema {
+                        name: "a",
+                        doc: None,
+                        r#type: mk_primitive(PrimitiveType::Int),
+                        default: None,
+                        aliases: vec![],
+                    },
+                    AvroFieldSchema {
+                        name: "b",
+                        doc: None,
+                        r#type: mk_primitive(PrimitiveType::Int),
+                        default: Some(Value::Number(serde_json::Number::from(42))),
+                        aliases: vec![],
+                    },
+                ],
+                attributes: Attributes::default(),
+            })),
+            mk_primitive(PrimitiveType::String),
+        ]);
+        let mut maker = Maker::new(false, false, Default::default());
+        let dt = maker
+            .make_data_type(&writer, Some(&reader), None)
+            .expect("resolution should succeed");
+        // Verify the union resolution structure
+        let resolved = match dt.resolution.as_ref() {
+            Some(ResolutionInfo::Union(u)) => u,
+            other => panic!("expected union resolution info, got {other:?}"),
+        };
+        assert!(resolved.writer_is_union && resolved.reader_is_union);
+        assert_eq!(resolved.writer_to_reader.len(), 2);
+        let resolution = match resolved.writer_to_reader[1].as_ref() {
+            Some((0, resolution)) => resolution,
+            other => panic!("unexpected writer-to-reader table value {other:?}"),
+        };
+        match resolution {
+            ResolutionInfo::Record(ResolvedRecord {
+                writer_fields,
+                default_fields,
+            }) => {
+                assert_eq!(writer_fields.len(), 1);
+                assert_eq!(writer_fields[0], ResolvedField::ToReader(0));
+                assert_eq!(default_fields.len(), 1);
+                assert_eq!(default_fields[0], 1);
+            }
+            other => panic!("unexpected resolution {other:?}"),
+        }
+        // The matching child (Inner at index 0) should have field b with DefaultValue
+        let children = match dt.codec() {
+            Codec::Union(children, _, _) => children,
+            other => panic!("expected union codec, got {other:?}"),
+        };
+        let inner_fields = match children[0].codec() {
+            Codec::Struct(f) => f,
+            other => panic!("expected struct codec for Inner, got {other:?}"),
+        };
+        assert_eq!(inner_fields.len(), 2);
+        assert_eq!(inner_fields[1].name(), "b");
+        assert_eq!(
+            inner_fields[1].data_type().resolution,
+            Some(ResolutionInfo::DefaultValue(AvroLiteral::Int(42))),
+            "field b should have DefaultValue(Int(42)) from schema resolution"
         );
     }
 
@@ -2696,11 +3015,22 @@ mod tests {
             mk_primitive(PrimitiveType::String),
             mk_primitive(PrimitiveType::Null),
         ]);
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
         assert!(matches!(dt.codec(), Codec::Utf8));
         assert_eq!(dt.nullability, Some(Nullability::NullFirst));
-        assert!(dt.resolution.is_none());
+        assert_eq!(
+            dt.resolution,
+            Some(ResolutionInfo::Union(ResolvedUnion {
+                writer_to_reader: [
+                    None,
+                    Some((0, ResolutionInfo::Promotion(Promotion::Direct)))
+                ]
+                .into(),
+                writer_is_union: true,
+                reader_is_union: true
+            }))
+        );
     }
 
     #[test]
@@ -2713,13 +3043,21 @@ mod tests {
             mk_primitive(PrimitiveType::Double),
             mk_primitive(PrimitiveType::Null),
         ]);
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker.make_data_type(&writer, Some(&reader), None).unwrap();
         assert!(matches!(dt.codec(), Codec::Float64));
         assert_eq!(dt.nullability, Some(Nullability::NullFirst));
         assert_eq!(
             dt.resolution,
-            Some(ResolutionInfo::Promotion(Promotion::IntToDouble))
+            Some(ResolutionInfo::Union(ResolvedUnion {
+                writer_to_reader: [
+                    None,
+                    Some((0, ResolutionInfo::Promotion(Promotion::IntToDouble)))
+                ]
+                .into(),
+                writer_is_union: true,
+                reader_is_union: true
+            }))
         );
     }
 
@@ -2727,7 +3065,7 @@ mod tests {
     fn test_resolve_type_promotion() {
         let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Int));
         let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::Long));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .unwrap();
@@ -2764,7 +3102,7 @@ mod tests {
 
         let schema: Schema = serde_json::from_str(schema_str).unwrap();
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let avro_data_type = maker.make_data_type(&schema, None, None).unwrap();
 
         if let Codec::Struct(fields) = avro_data_type.codec() {
@@ -2844,7 +3182,7 @@ mod tests {
 
         let schema: Schema = serde_json::from_str(schema_str).unwrap();
 
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let avro_data_type = maker.make_data_type(&schema, None, None).unwrap();
 
         if let Codec::Struct(fields) = avro_data_type.codec() {
@@ -2900,7 +3238,7 @@ mod tests {
     fn test_resolve_from_writer_and_reader_defaults_root_name_for_non_record_reader() {
         let writer_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String));
         let reader_schema = Schema::TypeName(TypeName::Primitive(PrimitiveType::String));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let data_type = maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .expect("resolution should succeed");
@@ -3034,13 +3372,12 @@ mod tests {
             .parse_and_store_default(&serde_json::json!(1_000_000))
             .unwrap();
         assert_eq!(ltm, AvroLiteral::Long(1_000_000));
-        let mut dt_ts_milli = AvroDataType::new(Codec::TimestampMillis(true), HashMap::new(), None);
+        let mut dt_ts_milli = AvroDataType::new(Codec::TimestampMillis(None), HashMap::new(), None);
         let l1 = dt_ts_milli
             .parse_and_store_default(&serde_json::json!(123))
             .unwrap();
         assert_eq!(l1, AvroLiteral::Long(123));
-        let mut dt_ts_micro =
-            AvroDataType::new(Codec::TimestampMicros(false), HashMap::new(), None);
+        let mut dt_ts_micro = AvroDataType::new(Codec::TimestampMicros(None), HashMap::new(), None);
         let l2 = dt_ts_micro
             .parse_and_store_default(&serde_json::json!(456))
             .unwrap();
@@ -3279,7 +3616,7 @@ mod tests {
                 additional: r_add,
             },
         }));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .unwrap();
@@ -3308,7 +3645,7 @@ mod tests {
             ])),
             attributes: Attributes::default(),
         }));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .unwrap();
@@ -3316,14 +3653,7 @@ mod tests {
             assert_eq!(inner.nullability(), Some(Nullability::NullFirst));
             assert!(matches!(inner.codec(), Codec::Int32));
             match inner.resolution.as_ref() {
-                Some(ResolutionInfo::Union(info)) => {
-                    assert!(!info.writer_is_union, "writer should be non-union");
-                    assert!(info.reader_is_union, "reader should be union");
-                    assert_eq!(
-                        info.writer_to_reader.as_ref(),
-                        &[Some((1, Promotion::Direct))]
-                    );
-                }
+                Some(ResolutionInfo::Promotion(Promotion::Direct)) => {}
                 other => panic!("expected Union resolution, got {other:?}"),
             }
         } else {
@@ -3347,7 +3677,7 @@ mod tests {
             size: 16,
             attributes: Attributes::default(),
         }));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .unwrap();
@@ -3367,7 +3697,7 @@ mod tests {
                 additional: Default::default(),
             },
         }));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Default::default());
         let dt = maker.make_data_type(&schema, None, None).unwrap();
         assert!(matches!(dt.codec(), Codec::IntervalMonthDayNano));
         assert_eq!(
@@ -3448,7 +3778,7 @@ mod tests {
             ],
             attributes: Attributes::default(),
         }));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let dt = maker
             .make_data_type(&writer, Some(&reader), None)
             .expect("record resolution");
@@ -3469,11 +3799,18 @@ mod tests {
             Some(ResolutionInfo::Record(ref r)) => r.clone(),
             other => panic!("expected record resolution, got {other:?}"),
         };
-        assert_eq!(rec.writer_to_reader.as_ref(), &[Some(1), None, Some(0)]);
+        assert!(matches!(
+            &rec.writer_fields[..],
+            &[
+                ResolvedField::ToReader(1),
+                ResolvedField::Skip(_),
+                ResolvedField::ToReader(0),
+            ]
+        ));
         assert_eq!(rec.default_fields.as_ref(), &[2usize, 3usize]);
-        assert!(rec.skip_fields[0].is_none());
-        assert!(rec.skip_fields[2].is_none());
-        let skip1 = rec.skip_fields[1].as_ref().expect("skip field present");
+        let ResolvedField::Skip(skip1) = &rec.writer_fields[1] else {
+            panic!("should skip field 1")
+        };
         assert!(matches!(skip1.codec(), Codec::Utf8));
         let name_md = &fields[2].data_type().metadata;
         assert_eq!(
@@ -3522,7 +3859,7 @@ mod tests {
         };
         let writer_schema = Schema::Complex(ComplexType::Record(writer_record));
         let reader_schema = Schema::Complex(ComplexType::Record(reader_record));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         let result = maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .expect("record alias resolution should succeed");
@@ -3554,7 +3891,7 @@ mod tests {
         };
         let writer_schema = Schema::Complex(ComplexType::Enum(writer_enum));
         let reader_schema = Schema::Complex(ComplexType::Enum(reader_enum));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .expect("enum alias resolution should succeed");
@@ -3578,7 +3915,7 @@ mod tests {
         };
         let writer_schema = Schema::Complex(ComplexType::Fixed(writer_fixed));
         let reader_schema = Schema::Complex(ComplexType::Fixed(reader_fixed));
-        let mut maker = Maker::new(false, false);
+        let mut maker = Maker::new(false, false, Tz::default());
         maker
             .make_data_type(&writer_schema, Some(&reader_schema), None)
             .expect("fixed alias resolution should succeed");
diff --git a/arrow-avro/src/reader/async_reader/builder.rs b/arrow-avro/src/reader/async_reader/builder.rs
index 0f9a7abf1cd4..d3cca70425de 100644
--- a/arrow-avro/src/reader/async_reader/builder.rs
+++ b/arrow-avro/src/reader/async_reader/builder.rs
@@ -15,13 +15,13 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use crate::codec::AvroFieldBuilder;
+use crate::codec::{AvroFieldBuilder, Tz};
 use crate::errors::AvroError;
 use crate::reader::async_reader::ReaderState;
-use crate::reader::header::{Header, HeaderDecoder};
+use crate::reader::header::{Header, HeaderDecoder, HeaderInfo};
 use crate::reader::record::RecordDecoder;
 use crate::reader::{AsyncAvroFileReader, AsyncFileReader, Decoder};
-use crate::schema::{AvroSchema, FingerprintAlgorithm, SCHEMA_METADATA_KEY};
+use crate::schema::{AvroSchema, FingerprintAlgorithm};
 use indexmap::IndexMap;
 use std::ops::Range;
 
@@ -38,6 +38,7 @@ pub struct ReaderBuilder<R> {
     header_size_hint: Option<u64>,
     utf8_view: bool,
     strict_mode: bool,
+    tz: Tz,
 }
 
 impl<R> ReaderBuilder<R> {
@@ -52,6 +53,7 @@ impl<R> ReaderBuilder<R> {
             header_size_hint: None,
             utf8_view: false,
             strict_mode: false,
+            tz: Default::default(),
         }
     }
 
@@ -107,52 +109,81 @@ impl<R> ReaderBuilder<R> {
             ..self
         }
     }
+
+    /// Sets the timezone representation for Avro timestamp fields.
+    ///
+    /// The default is `Tz::OffsetZero`, meaning the "+00:00" time zone ID.
+    pub fn with_tz(mut self, tz: Tz) -> Self {
+        self.tz = tz;
+        self
+    }
 }
 
-impl<R: AsyncFileReader> ReaderBuilder<R> {
-    async fn read_header(&mut self) -> Result<(Header, u64), AvroError> {
-        let mut decoder = HeaderDecoder::default();
-        let mut position = 0;
-        loop {
-            let range_to_fetch = position
-                ..(position + self.header_size_hint.unwrap_or(DEFAULT_HEADER_SIZE_HINT))
-                    .min(self.file_size);
+/// Reads the Avro file header (magic, metadata, sync marker) asynchronously from `reader`.
+///
+/// On success, returns the parsed [`HeaderInfo`] containing the header and its length in bytes.
+pub async fn read_header_info<R>(
+    reader: &mut R,
+    file_size: u64,
+    header_size_hint: Option<u64>,
+) -> Result<HeaderInfo, AvroError>
+where
+    R: AsyncFileReader,
+{
+    read_header(reader, file_size, header_size_hint)
+        .await
+        .map(|(header, header_len)| HeaderInfo::new(header, header_len))
+}
 
-            // Maybe EOF after the header, no actual data
-            if range_to_fetch.is_empty() {
-                break;
-            }
+async fn read_header<R>(
+    reader: &mut R,
+    file_size: u64,
+    header_size_hint: Option<u64>,
+) -> Result<(Header, u64), AvroError>
+where
+    R: AsyncFileReader,
+{
+    let mut decoder = HeaderDecoder::default();
+    let mut position = 0;
+    loop {
+        let range_to_fetch = position
+            ..(position + header_size_hint.unwrap_or(DEFAULT_HEADER_SIZE_HINT)).min(file_size);
 
-            let current_data = self
-                .reader
-                .get_bytes(range_to_fetch.clone())
-                .await
-                .map_err(|err| {
-                    AvroError::General(format!(
-                        "Error fetching Avro header from file reader: {err}"
-                    ))
-                })?;
-            if current_data.is_empty() {
-                return Err(AvroError::EOF(
-                    "Unexpected EOF while fetching header data".into(),
-                ));
-            }
+        // Maybe EOF after the header, no actual data
+        if range_to_fetch.is_empty() {
+            break;
+        }
 
-            let read = current_data.len();
-            let decoded = decoder.decode(&current_data)?;
-            if decoded != read {
-                position += decoded as u64;
-                break;
-            }
-            position += read as u64;
+        let current_data = reader
+            .get_bytes(range_to_fetch.clone())
+            .await
+            .map_err(|err| {
+                AvroError::General(format!(
+                    "Error fetching Avro header from file reader: {err}"
+                ))
+            })?;
+        if current_data.is_empty() {
+            return Err(AvroError::EOF(
+                "Unexpected EOF while fetching header data".into(),
+            ));
         }
 
-        decoder
-            .flush()
-            .map(|header| (header, position))
-            .ok_or_else(|| AvroError::EOF("Unexpected EOF while reading Avro header".into()))
+        let read = current_data.len();
+        let decoded = decoder.decode(&current_data)?;
+        if decoded != read {
+            position += decoded as u64;
+            break;
+        }
+        position += read as u64;
     }
 
+    decoder
+        .flush()
+        .map(|header| (header, position))
+        .ok_or_else(|| AvroError::EOF("Unexpected EOF while reading Avro header".into()))
+}
+
+impl<R: AsyncFileReader> ReaderBuilder<R> {
     /// Build the asynchronous Avro reader with the provided parameters.
     /// This reads the header first to initialize the reader state.
     pub async fn try_build(mut self) -> Result<AsyncAvroFileReader<R>, AvroError> {
@@ -162,18 +193,24 @@ impl<R: AsyncFileReader> ReaderBuilder<R> {
 
         // Start by reading the header from the beginning of the avro file
         // take the writer schema from the header
-        let (header, header_len) = self.read_header().await?;
-        let writer_schema = {
-            let raw = header.get(SCHEMA_METADATA_KEY).ok_or_else(|| {
-                AvroError::ParseError("No Avro schema present in file header".to_string())
-            })?;
-            let json_string = std::str::from_utf8(raw)
-                .map_err(|e| {
-                    AvroError::ParseError(format!("Invalid UTF-8 in Avro schema header: {e}"))
-                })?
-                .to_string();
-            AvroSchema::new(json_string)
-        };
+        let header_info =
+            read_header_info(&mut self.reader, self.file_size, self.header_size_hint).await?;
+
+        self.build_with_header(header_info)
+    }
+
+    /// Build the asynchronous Avro reader with the provided header.
+    ///
+    /// This allows initializing the reader with pre-parsed header information.
+    /// Note that this method is not async because it does not need to perform any I/O operations.
+    ///
+    /// Note: Any `header_size_hint` set via [`Self::with_header_size_hint`] is not used
+    /// when building with a pre-parsed header, since no header fetching occurs.
+    pub fn build_with_header(
+        self,
+        header_info: HeaderInfo,
+    ) -> Result<AsyncAvroFileReader<R>, AvroError> {
+        let writer_schema = header_info.writer_schema()?;
 
         // If projection exists, project the reader schema,
         // if no reader schema is provided, parse it from the header(get the raw writer schema), and project that
@@ -208,6 +245,7 @@ impl<R: AsyncFileReader> ReaderBuilder<R> {
             builder
                 .with_utf8view(self.utf8_view)
                 .with_strict_mode(self.strict_mode)
+                .with_tz(self.tz)
                 .build()
         }?;
 
@@ -219,6 +257,7 @@ impl<R: AsyncFileReader> ReaderBuilder<R> {
             IndexMap::new(),
             FingerprintAlgorithm::Rabin,
         );
+        let header_len = header_info.header_len();
         let range = match self.range {
             Some(r) => {
                 // If this PartitionedFile's range starts at 0, we need to skip the header bytes.
@@ -241,8 +280,9 @@ impl<R: AsyncFileReader> ReaderBuilder<R> {
                 reader: self.reader,
             }
         };
-        let codec = header.compression()?;
-        let sync_marker = header.sync();
+
+        let codec = header_info.compression()?;
+        let sync_marker = header_info.sync();
 
         Ok(AsyncAvroFileReader::new(
             range,
diff --git a/arrow-avro/src/reader/async_reader/mod.rs b/arrow-avro/src/reader/async_reader/mod.rs
index 53229f8576eb..c034411edb03 100644
--- a/arrow-avro/src/reader/async_reader/mod.rs
+++ b/arrow-avro/src/reader/async_reader/mod.rs
@@ -15,11 +15,16 @@
 // specific language governing permissions and limitations
 // under the License.
 
+//! Asynchronous implementation of Avro file reader.
+//!
+//! This module provides [`AsyncAvroFileReader`], which supports reading and decoding
+//! the Avro OCF format from any source that implements [`AsyncFileReader`].
+
 use crate::compression::CompressionCodec;
 use crate::reader::Decoder;
 use crate::reader::block::{BlockDecoder, BlockDecoderState};
 use arrow_array::RecordBatch;
-use arrow_schema::ArrowError;
+use arrow_schema::{ArrowError, SchemaRef};
 use bytes::Bytes;
 use futures::future::BoxFuture;
 use futures::{FutureExt, Stream};
@@ -32,7 +37,7 @@ mod async_file_reader;
 mod builder;
 
 pub use async_file_reader::AsyncFileReader;
-pub use builder::ReaderBuilder;
+pub use builder::{ReaderBuilder, read_header_info};
 
 #[cfg(feature = "object_store")]
 mod store;
@@ -173,6 +178,13 @@ impl<R> AsyncAvroFileReader<R> {
         }
     }
 
+    /// Returns the Arrow schema for batches produced by this reader.
+    ///
+    /// The schema is determined by the writer schema in the file and the reader schema provided to the builder.
+    pub fn schema(&self) -> SchemaRef {
+        self.decoder.schema()
+    }
+
     /// Calculate the byte range needed to complete the current block.
     /// Only valid when block_decoder is in Data or Sync state.
     /// Returns the range to fetch, or an error if EOF would be reached.
@@ -534,7 +546,10 @@ impl<R: AsyncFileReader + Unpin + 'static> Stream for AsyncAvroFileReader<R> {
 #[cfg(all(test, feature = "object_store"))]
 mod tests {
     use super::*;
-    use crate::schema::{AvroSchema, SCHEMA_METADATA_KEY};
+    use crate::codec::Tz;
+    use crate::schema::{
+        AVRO_NAME_METADATA_KEY, AVRO_NAMESPACE_METADATA_KEY, AvroSchema, SCHEMA_METADATA_KEY,
+    };
     use arrow_array::cast::AsArray;
     use arrow_array::types::{Int32Type, Int64Type};
     use arrow_array::*;
@@ -553,6 +568,10 @@ mod tests {
     }
 
     fn get_alltypes_schema() -> SchemaRef {
+        get_alltypes_schema_with_tz("+00:00")
+    }
+
+    fn get_alltypes_schema_with_tz(tz_id: &str) -> SchemaRef {
         let schema = Schema::new(vec![
             Field::new("id", DataType::Int32, true),
             Field::new("bool_col", DataType::Boolean, true),
@@ -566,7 +585,7 @@ mod tests {
             Field::new("string_col", DataType::Binary, true),
             Field::new(
                 "timestamp_col",
-                DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())),
+                DataType::Timestamp(TimeUnit::Microsecond, Some(tz_id.into())),
                 true,
             ),
         ])
@@ -758,39 +777,63 @@ mod tests {
                                 vec![Field::new("f1_3_1", DataType::Float64, false)].into(),
                             ),
                             false,
-                        ),
+                        )
+                        .with_metadata(HashMap::from([
+                            (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns3".to_owned()),
+                            (AVRO_NAME_METADATA_KEY.to_owned(), "record3".to_owned()),
+                        ])),
                     ]
                     .into(),
                 ),
                 false,
-            ),
+            )
+            .with_metadata(HashMap::from([
+                (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns2".to_owned()),
+                (AVRO_NAME_METADATA_KEY.to_owned(), "record2".to_owned()),
+            ])),
             Field::new(
                 "f2",
-                DataType::List(Arc::new(Field::new(
-                    "item",
-                    DataType::Struct(
-                        vec![
-                            Field::new("f2_1", DataType::Boolean, false),
-                            Field::new("f2_2", DataType::Float32, false),
-                        ]
-                        .into(),
-                    ),
-                    false,
-                ))),
+                DataType::List(Arc::new(
+                    Field::new(
+                        "item",
+                        DataType::Struct(
+                            vec![
+                                Field::new("f2_1", DataType::Boolean, false),
+                                Field::new("f2_2", DataType::Float32, false),
+                            ]
+                            .into(),
+                        ),
+                        false,
+                    )
+                    .with_metadata(HashMap::from([
+                        (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns4".to_owned()),
+                        (AVRO_NAME_METADATA_KEY.to_owned(), "record4".to_owned()),
+                    ])),
+                )),
                 false,
             ),
             Field::new(
                 "f3",
                 DataType::Struct(vec![Field::new("f3_1", DataType::Utf8, false)].into()),
                 true,
-            ),
+            )
+            .with_metadata(HashMap::from([
+                (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns5".to_owned()),
+                (AVRO_NAME_METADATA_KEY.to_owned(), "record5".to_owned()),
+            ])),
             Field::new(
                 "f4",
-                DataType::List(Arc::new(Field::new(
-                    "item",
-                    DataType::Struct(vec![Field::new("f4_1", DataType::Int64, false)].into()),
-                    true,
-                ))),
+                DataType::List(Arc::new(
+                    Field::new(
+                        "item",
+                        DataType::Struct(vec![Field::new("f4_1", DataType::Int64, false)].into()),
+                        true,
+                    )
+                    .with_metadata(HashMap::from([
+                        (AVRO_NAMESPACE_METADATA_KEY.to_owned(), "ns6".to_owned()),
+                        (AVRO_NAME_METADATA_KEY.to_owned(), "record6".to_owned()),
+                    ])),
+                )),
                 false,
             ),
         ])
@@ -1248,6 +1291,44 @@ mod tests {
         assert_eq!(batch.num_rows(), 8);
     }
 
+    #[tokio::test]
+    async fn test_builder_with_header_info() {
+        let file = arrow_test_data("avro/alltypes_plain.avro");
+        let store = Arc::new(LocalFileSystem::new());
+        let location = Path::from_filesystem_path(&file).unwrap();
+
+        let file_size = store.head(&location).await.unwrap().size;
+
+        let mut file_reader = AvroObjectReader::new(store, location);
+
+        let header_info = read_header_info(&mut file_reader, file_size, None)
+            .await
+            .unwrap();
+
+        assert_eq!(header_info.header_len(), 675);
+
+        let writer_schema = header_info.writer_schema().unwrap();
+        let expected_avro_json: serde_json::Value = serde_json::from_str(
+            get_alltypes_schema()
+                .metadata()
+                .get(SCHEMA_METADATA_KEY)
+                .unwrap(),
+        )
+        .unwrap();
+        let actual_avro_json: serde_json::Value =
+            serde_json::from_str(&writer_schema.json_string).unwrap();
+        assert_eq!(actual_avro_json, expected_avro_json);
+
+        let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024)
+            .build_with_header(header_info)
+            .unwrap();
+
+        let batches: Vec<RecordBatch> = reader.try_collect().await.unwrap();
+
+        let batch = &batches[0];
+        assert_eq!(batch.num_rows(), 8)
+    }
+
     #[tokio::test]
     async fn test_roundtrip_write_then_async_read() {
         use crate::writer::AvroWriter;
@@ -1538,6 +1619,92 @@ mod tests {
         assert!(err.to_string().contains("Duplicate projection index"));
     }
 
+    #[tokio::test]
+    async fn test_arrow_schema_from_reader_no_reader_schema() {
+        let file = arrow_test_data("avro/alltypes_plain.avro");
+        let store: Arc<dyn ObjectStore> = Arc::new(LocalFileSystem::new());
+        let location = Path::from_filesystem_path(&file).unwrap();
+        let file_size = store.head(&location).await.unwrap().size;
+
+        let file_reader = AvroObjectReader::new(store, location);
+        let expected_schema = get_alltypes_schema()
+            .as_ref()
+            .clone()
+            .with_metadata(Default::default());
+
+        // Build reader without providing reader schema - should use writer schema from file
+        let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024)
+            .try_build()
+            .await
+            .unwrap();
+
+        assert_eq!(reader.schema().as_ref(), &expected_schema);
+
+        let batches: Vec<RecordBatch> = reader.try_collect().await.unwrap();
+        let batch = &batches[0];
+
+        assert_eq!(batch.schema().as_ref(), &expected_schema);
+    }
+
+    #[tokio::test]
+    async fn test_arrow_schema_from_reader_with_reader_schema() {
+        let file = arrow_test_data("avro/alltypes_plain.avro");
+        let store: Arc<dyn ObjectStore> = Arc::new(LocalFileSystem::new());
+        let location = Path::from_filesystem_path(&file).unwrap();
+        let file_size = store.head(&location).await.unwrap().size;
+
+        let file_reader = AvroObjectReader::new(store, location);
+        let schema = get_alltypes_schema()
+            .project(&[0, 1, 7])
+            .unwrap()
+            .with_metadata(Default::default());
+        let reader_schema = AvroSchema::try_from(&schema).unwrap();
+        let expected_schema = schema.clone();
+
+        // Build reader with provided reader schema - must apply the projection
+        let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024)
+            .with_reader_schema(reader_schema)
+            .try_build()
+            .await
+            .unwrap();
+
+        assert_eq!(reader.schema().as_ref(), &expected_schema);
+
+        let batches: Vec<RecordBatch> = reader.try_collect().await.unwrap();
+        let batch = &batches[0];
+
+        assert_eq!(batch.schema().as_ref(), &expected_schema);
+    }
+
+    #[tokio::test]
+    async fn test_arrow_schema_from_reader_nested_records() {
+        let file = arrow_test_data("avro/nested_records.avro");
+        let store: Arc<dyn ObjectStore> = Arc::new(LocalFileSystem::new());
+        let location = Path::from_filesystem_path(&file).unwrap();
+        let file_size = store.head(&location).await.unwrap().size;
+
+        let file_reader = AvroObjectReader::new(store, location);
+
+        // The schema produced by the reader should match the expected schema,
+        // attaching Avro type name metadata to fields of record and list types.
+        let expected_schema = get_nested_records_schema()
+            .as_ref()
+            .clone()
+            .with_metadata(Default::default());
+
+        let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024)
+            .try_build()
+            .await
+            .unwrap();
+
+        assert_eq!(reader.schema().as_ref(), &expected_schema);
+
+        let batches: Vec<RecordBatch> = reader.try_collect().await.unwrap();
+        let batch = &batches[0];
+
+        assert_eq!(batch.schema().as_ref(), &expected_schema);
+    }
+
     #[tokio::test]
     async fn test_with_header_size_hint_small() {
         // Use a very small header size hint to force multiple fetches
@@ -1592,6 +1759,42 @@ mod tests {
         assert_eq!(batch.num_columns(), 11);
     }
 
+    #[tokio::test]
+    async fn test_with_tz_utc() {
+        let file = arrow_test_data("avro/alltypes_plain.avro");
+        let store: Arc<dyn ObjectStore> = Arc::new(LocalFileSystem::new());
+        let location = Path::from_filesystem_path(&file).unwrap();
+        let file_size = store.head(&location).await.unwrap().size;
+
+        let file_reader = AvroObjectReader::new(store, location);
+        let schema = get_alltypes_schema_with_tz("UTC");
+        let reader_schema = AvroSchema::try_from(schema.as_ref()).unwrap();
+
+        // Specify the time zone ID of "UTC" for timestamp fields with time zone.
+        let reader = AsyncAvroFileReader::builder(file_reader, file_size, 1024)
+            .with_reader_schema(reader_schema)
+            .with_tz(Tz::Utc)
+            .try_build()
+            .await
+            .unwrap();
+
+        let batches: Vec<RecordBatch> = reader.try_collect().await.unwrap();
+        let batch = &batches[0];
+
+        assert_eq!(batch.num_columns(), 11);
+
+        let schema = batch.schema();
+        let ts_field = schema.field_with_name("timestamp_col").unwrap();
+        assert!(
+            matches!(
+                ts_field.data_type(),
+                DataType::Timestamp(TimeUnit::Microsecond, Some(tz)) if tz.as_ref() == "UTC"
+            ),
+            "expected Timestamp(Microsecond, Some(\"UTC\")), got {:?}",
+            ts_field.data_type()
+        );
+    }
+
     #[tokio::test]
     async fn test_with_utf8_view_enabled() {
         // Test that utf8_view produces StringViewArray instead of StringArray
diff --git a/arrow-avro/src/reader/header.rs b/arrow-avro/src/reader/header.rs
index b5efd8bcdb65..c5593ba0ad70 100644
--- a/arrow-avro/src/reader/header.rs
+++ b/arrow-avro/src/reader/header.rs
@@ -20,12 +20,17 @@
 use crate::compression::{CODEC_METADATA_KEY, CompressionCodec};
 use crate::errors::AvroError;
 use crate::reader::vlq::VLQDecoder;
-use crate::schema::{SCHEMA_METADATA_KEY, Schema};
+use crate::schema::{AvroSchema, SCHEMA_METADATA_KEY, Schema};
 use std::io::BufRead;
+use std::str;
+use std::sync::Arc;
 
 /// Read the Avro file header (magic, metadata, sync marker) from `reader`.
-pub(crate) fn read_header<R: BufRead>(mut reader: R) -> Result<Header, AvroError> {
+///
+/// On success, returns the parsed [`Header`] and the number of bytes read from `reader`.
+pub(crate) fn read_header<R: BufRead>(mut reader: R) -> Result<(Header, u64), AvroError> {
     let mut decoder = HeaderDecoder::default();
+    let mut position = 0;
     loop {
         let buf = reader.fill_buf()?;
         if buf.is_empty() {
@@ -34,12 +39,14 @@ pub(crate) fn read_header<R: BufRead>(mut reader: R) -> Result<Header, AvroError
         let read = buf.len();
         let decoded = decoder.decode(buf)?;
         reader.consume(decoded);
+        position += decoded as u64;
         if decoded != read {
             break;
         }
     }
     decoder
         .flush()
+        .map(|header| (header, position))
         .ok_or_else(|| AvroError::EOF("Unexpected EOF while reading Avro header".to_string()))
 }
 
@@ -124,6 +131,60 @@ impl Header {
     }
 }
 
+/// Header information for an Avro OCF file.
+///
+/// The header can be parsed once and shared to construct multiple readers
+/// for the same file, and so this struct is designed to be cheaply clonable.
+#[derive(Clone)]
+pub struct HeaderInfo(Arc<HeaderInfoInner>);
+
+struct HeaderInfoInner {
+    header: Header,
+    header_len: u64,
+}
+
+/// Reads the Avro file header (magic, metadata, sync marker) from `reader`.
+///
+/// On success, returns the parsed [`HeaderInfo`] containing the header and its length in bytes.
+pub fn read_header_info<R: BufRead>(reader: R) -> Result<HeaderInfo, AvroError> {
+    let (header, header_len) = read_header(reader)?;
+    Ok(HeaderInfo::new(header, header_len))
+}
+
+impl HeaderInfo {
+    pub(crate) fn new(header: Header, header_len: u64) -> Self {
+        Self(Arc::new(HeaderInfoInner { header, header_len }))
+    }
+
+    /// Returns the writer schema for this file.
+    pub fn writer_schema(&self) -> Result<AvroSchema, AvroError> {
+        let raw = self.0.header.get(SCHEMA_METADATA_KEY).ok_or_else(|| {
+            AvroError::ParseError("No Avro schema present in file header".to_string())
+        })?;
+        let json_string = str::from_utf8(raw)
+            .map_err(|e| {
+                AvroError::ParseError(format!("Invalid UTF-8 in Avro schema header: {e}"))
+            })?
+            .to_string();
+        Ok(AvroSchema::new(json_string))
+    }
+
+    /// Returns the [`CompressionCodec`] if any
+    pub fn compression(&self) -> Result<Option<CompressionCodec>, AvroError> {
+        self.0.header.compression()
+    }
+
+    /// Returns the length of the header in bytes.
+    pub fn header_len(&self) -> u64 {
+        self.0.header_len
+    }
+
+    /// Returns the sync token for this file.
+    pub fn sync(&self) -> [u8; 16] {
+        self.0.header.sync()
+    }
+}
+
 /// A decoder for [`Header`]
 ///
 /// The avro file format does not encode the length of the header, and so it
@@ -315,7 +376,7 @@ mod test {
 
     fn decode_file(file: &str) -> Header {
         let file = File::open(file).unwrap();
-        read_header(BufReader::with_capacity(1000, file)).unwrap()
+        read_header(BufReader::with_capacity(1000, file)).unwrap().0
     }
 
     #[test]
diff --git a/arrow-avro/src/reader/mod.rs b/arrow-avro/src/reader/mod.rs
index aa01f272bfeb..070204f2bcfb 100644
--- a/arrow-avro/src/reader/mod.rs
+++ b/arrow-avro/src/reader/mod.rs
@@ -478,7 +478,7 @@
 //!   descriptive error. Populate the store up front to avoid this.
 //!
 //! ---
-use crate::codec::AvroFieldBuilder;
+use crate::codec::{AvroFieldBuilder, Tz};
 use crate::errors::AvroError;
 use crate::reader::header::read_header;
 use crate::schema::{
@@ -500,7 +500,9 @@ mod record;
 mod vlq;
 
 #[cfg(feature = "async")]
-mod async_reader;
+pub mod async_reader;
+
+pub use header::{HeaderInfo, read_header_info};
 
 #[cfg(feature = "object_store")]
 pub use async_reader::AvroObjectReader;
@@ -967,6 +969,7 @@ pub struct ReaderBuilder {
     batch_size: usize,
     strict_mode: bool,
     utf8_view: bool,
+    tz: Tz,
     reader_schema: Option<AvroSchema>,
     projection: Option<Vec<usize>>,
     writer_schema_store: Option<SchemaStore>,
@@ -979,6 +982,7 @@ impl Default for ReaderBuilder {
             batch_size: 1024,
             strict_mode: false,
             utf8_view: false,
+            tz: Default::default(),
             reader_schema: None,
             projection: None,
             writer_schema_store: None,
@@ -993,6 +997,7 @@ impl ReaderBuilder {
     /// * `batch_size = 1024`
     /// * `strict_mode = false`
     /// * `utf8_view = false`
+    /// * `tz = Tz::OffsetZero`
     /// * `reader_schema = None`
     /// * `projection = None`
     /// * `writer_schema_store = None`
@@ -1013,6 +1018,7 @@ impl ReaderBuilder {
         let root = builder
             .with_utf8view(self.utf8_view)
             .with_strict_mode(self.strict_mode)
+            .with_tz(self.tz)
             .build()?;
         RecordDecoder::try_new_with_options(root.data_type())
     }
@@ -1173,6 +1179,14 @@ impl ReaderBuilder {
         self
     }
 
+    /// Sets the timezone representation for Avro timestamp fields.
+    ///
+    /// The default is `Tz::OffsetZero`, meaning the "+00:00" time zone ID.
+    pub fn with_tz(mut self, tz: Tz) -> Self {
+        self.tz = tz;
+        self
+    }
+
     /// Sets the **reader schema** used during decoding.
     ///
     /// If not provided, the writer schema from the OCF header (for `Reader`) or the
@@ -1273,7 +1287,7 @@ impl ReaderBuilder {
     /// the discovered writer (and optional reader) schema, and prepares to iterate blocks,
     /// decompressing if necessary.
     pub fn build<R: BufRead>(self, mut reader: R) -> Result<Reader<R>, ArrowError> {
-        let header = read_header(&mut reader)?;
+        let (header, _) = read_header(&mut reader)?;
         let decoder = self.make_decoder(Some(&header), self.reader_schema.as_ref())?;
         Ok(Reader {
             reader,
@@ -1400,7 +1414,7 @@ impl<R: BufRead> RecordBatchReader for Reader<R> {
 
 #[cfg(test)]
 mod test {
-    use crate::codec::AvroFieldBuilder;
+    use crate::codec::{AvroFieldBuilder, Tz};
     use crate::reader::header::HeaderDecoder;
     use crate::reader::record::RecordDecoder;
     use crate::reader::{Decoder, Reader, ReaderBuilder};
@@ -1632,7 +1646,7 @@ mod test {
 
     fn load_writer_schema_json(path: &str) -> Value {
         let file = File::open(path).unwrap();
-        let header = super::read_header(BufReader::new(file)).unwrap();
+        let (header, _) = super::read_header(BufReader::new(file)).unwrap();
         let schema = header.schema().unwrap().unwrap();
         serde_json::to_value(&schema).unwrap()
     }
@@ -3129,6 +3143,43 @@ mod test {
         );
     }
 
+    #[test]
+    fn test_timestamp_with_utc_tz() {
+        let path = arrow_test_data("avro/alltypes_plain.avro");
+        let reader_schema =
+            make_reader_schema_with_selected_fields_in_order(&path, &["timestamp_col"]);
+        let file = File::open(path).unwrap();
+        let reader = ReaderBuilder::new()
+            .with_batch_size(1024)
+            .with_utf8_view(false)
+            .with_reader_schema(reader_schema)
+            .with_tz(Tz::Utc)
+            .build(BufReader::new(file))
+            .unwrap();
+        let schema = reader.schema();
+        let batches = reader.collect::<Result<Vec<_>, _>>().unwrap();
+        let batch = arrow::compute::concat_batches(&schema, &batches).unwrap();
+        let expected = RecordBatch::try_from_iter_with_nullable([(
+            "timestamp_col",
+            Arc::new(
+                TimestampMicrosecondArray::from_iter_values([
+                    1235865600000000, // 2009-03-01T00:00:00.000
+                    1235865660000000, // 2009-03-01T00:01:00.000
+                    1238544000000000, // 2009-04-01T00:00:00.000
+                    1238544060000000, // 2009-04-01T00:01:00.000
+                    1233446400000000, // 2009-02-01T00:00:00.000
+                    1233446460000000, // 2009-02-01T00:01:00.000
+                    1230768000000000, // 2009-01-01T00:00:00.000
+                    1230768060000000, // 2009-01-01T00:01:00.000
+                ])
+                .with_timezone("UTC"),
+            ) as _,
+            true,
+        )])
+        .unwrap();
+        assert_eq!(batch, expected);
+    }
+
     #[test]
     // TODO: avoid requiring snappy for this file
     #[cfg(feature = "snappy")]
@@ -6866,6 +6917,264 @@ mod test {
         assert_eq!(int_values.value(1), 2);
     }
 
+    #[test]
+    fn test_nested_record_field_addition() {
+        let file = arrow_test_data("avro/nested_records.avro");
+
+        // Adds fields to the writer schema:
+        // * "ns2.record2" / "f1_4"
+        //   - nullable
+        //   - added last
+        //   - the containing "f1" field is made nullable in the reader
+        // * "ns4.record4" / "f2_3"
+        //   - non-nullable with an integer default value
+        //   - resolution of a record nested in an array
+        // * "ns5.record5" / "f3_0"
+        //   - non-nullable with a string default value
+        //   - prepended before existing fields in the schema order
+        let reader_schema = AvroSchema::new(
+            r#"
+            {
+                "type": "record",
+                "name": "record1",
+                "namespace": "ns1",
+                "fields": [
+                    {
+                        "name": "f1",
+                        "type": [
+                            "null",
+                            {
+                                "type": "record",
+                                "name": "record2",
+                                "namespace": "ns2",
+                                "fields": [
+                                    {
+                                        "name": "f1_1",
+                                        "type": "string"
+                                    },
+                                    {
+                                        "name": "f1_2",
+                                        "type": "int"
+                                    },
+                                    {
+                                        "name": "f1_3",
+                                        "type": {
+                                            "type": "record",
+                                            "name": "record3",
+                                            "namespace": "ns3",
+                                            "fields": [
+                                                {
+                                                    "name": "f1_3_1",
+                                                    "type": "double"
+                                                }
+                                            ]
+                                        }
+                                    },
+                                    {
+                                        "name": "f1_4",
+                                        "type": ["null", "int"],
+                                        "default": null
+                                    }
+                                ]
+                            }
+                        ]
+                    },
+                    {
+                        "name": "f2",
+                        "type": {
+                            "type": "array",
+                            "items": {
+                                "type": "record",
+                                "name": "record4",
+                                "namespace": "ns4",
+                                "fields": [
+                                    {
+                                        "name": "f2_1",
+                                        "type": "boolean"
+                                    },
+                                    {
+                                        "name": "f2_2",
+                                        "type": "float"
+                                    },
+                                    {
+                                        "name": "f2_3",
+                                        "type": ["null", "int"],
+                                        "default": 42
+                                    }
+                                ]
+                            }
+                        }
+                    },
+                    {
+                        "name": "f3",
+                        "type": [
+                            "null",
+                            {
+                                "type": "record",
+                                "name": "record5",
+                                "namespace": "ns5",
+                                "fields": [
+                                    {
+                                        "name": "f3_0",
+                                        "type": "string",
+                                        "default": "lorem ipsum"
+                                    },
+                                    {
+                                        "name": "f3_1",
+                                        "type": "string"
+                                    }
+                                ]
+                            }
+                        ],
+                        "default": null
+                    },
+                    {
+                        "name": "f4",
+                        "type": {
+                            "type": "array",
+                            "items": [
+                                "null",
+                                {
+                                    "type": "record",
+                                    "name": "record6",
+                                    "namespace": "ns6",
+                                    "fields": [
+                                        {
+                                            "name": "f4_1",
+                                            "type": "long"
+                                        }
+                                    ]
+                                }
+                            ]
+                        }
+                    }
+                ]
+            }
+            "#
+            .to_string(),
+        );
+
+        let file = File::open(&file).unwrap();
+        let mut reader = ReaderBuilder::new()
+            .with_reader_schema(reader_schema)
+            .build(BufReader::new(file))
+            .expect("reader with evolved reader schema should be built successfully");
+
+        let batch = reader
+            .next()
+            .expect("should have at least one batch")
+            .expect("reading should succeed");
+
+        assert!(batch.num_rows() > 0);
+
+        let schema = batch.schema();
+
+        let f1_field = schema.field_with_name("f1").expect("f1 field should exist");
+        if let DataType::Struct(f1_fields) = f1_field.data_type() {
+            let (_, f1_4) = f1_fields
+                .find("f1_4")
+                .expect("f1_4 field should be present in record2");
+            assert!(f1_4.is_nullable(), "f1_4 should be nullable");
+            assert_eq!(f1_4.data_type(), &DataType::Int32, "f1_4 should be Int32");
+            assert_eq!(
+                f1_4.metadata().get("avro.field.default"),
+                Some(&"null".to_string()),
+                "f1_4 should have null default value in metadata"
+            );
+        } else {
+            panic!("f1 should be a struct");
+        }
+
+        let f2_field = schema.field_with_name("f2").expect("f2 field should exist");
+        if let DataType::List(f2_items_field) = f2_field.data_type() {
+            if let DataType::Struct(f2_items_fields) = f2_items_field.data_type() {
+                let (_, f2_3) = f2_items_fields
+                    .find("f2_3")
+                    .expect("f2_3 field should be present in record4");
+                assert!(f2_3.is_nullable(), "f2_3 should be nullable");
+                assert_eq!(f2_3.data_type(), &DataType::Int32, "f2_3 should be Int32");
+                assert_eq!(
+                    f2_3.metadata().get("avro.field.default"),
+                    Some(&"42".to_string()),
+                    "f2_3 should have 42 default value in metadata"
+                );
+            } else {
+                panic!("f2 array items should be a struct");
+            }
+        } else {
+            panic!("f2 should be a list");
+        }
+
+        let f3_field = schema.field_with_name("f3").expect("f3 field should exist");
+        assert!(f3_field.is_nullable(), "f3 should be nullable");
+        if let DataType::Struct(f3_fields) = f3_field.data_type() {
+            let (_, f3_0) = f3_fields
+                .find("f3_0")
+                .expect("f3_0 field should be present in record5");
+            assert!(!f3_0.is_nullable(), "f3_0 should be non-nullable");
+            assert_eq!(f3_0.data_type(), &DataType::Utf8, "f3_0 should be a string");
+            assert_eq!(
+                f3_0.metadata().get("avro.field.default"),
+                Some(&"\"lorem ipsum\"".to_string()),
+                "f3_0 should have \"lorem ipsum\" default value in metadata"
+            );
+        } else {
+            panic!("f3 should be a struct");
+        }
+
+        // Verify the actual values in the columns match the expected defaults
+        let num_rows = batch.num_rows();
+
+        // Check f1_4 values (should all be null since default is null)
+        let f1_array = batch
+            .column_by_name("f1")
+            .expect("f1 column should exist")
+            .as_struct();
+        let f1_4_array = f1_array
+            .column_by_name("f1_4")
+            .expect("f1_4 column should exist in f1 struct")
+            .as_primitive::<Int32Type>();
+
+        assert_eq!(f1_4_array.null_count(), num_rows);
+
+        let f2_array = batch
+            .column_by_name("f2")
+            .expect("f2 column should exist")
+            .as_list::<i32>();
+
+        for i in 0..num_rows {
+            assert!(!f2_array.is_null(i));
+            let f2_value = f2_array.value(i);
+            let f2_record_array = f2_value.as_struct();
+            let f2_3_array = f2_record_array
+                .column_by_name("f2_3")
+                .expect("f2_3 column should exist in f2 array items")
+                .as_primitive::<Int32Type>();
+
+            for j in 0..f2_3_array.len() {
+                assert!(!f2_3_array.is_null(j));
+                assert_eq!(f2_3_array.value(j), 42);
+            }
+        }
+
+        let f3_array = batch
+            .column_by_name("f3")
+            .expect("f3 column should exist")
+            .as_struct();
+        let f3_0_array = f3_array
+            .column_by_name("f3_0")
+            .expect("f3_0 column should exist in f3 struct")
+            .as_string::<i32>();
+
+        for i in 0..num_rows {
+            // Only check f3_0 when the parent f3 struct is not null
+            if !f3_array.is_null(i) {
+                assert!(!f3_0_array.is_null(i));
+                assert_eq!(f3_0_array.value(i), "lorem ipsum");
+            }
+        }
+    }
+
     fn corrupt_first_block_payload_byte(
         mut bytes: Vec<u8>,
         field_offset: usize,
@@ -8441,6 +8750,33 @@ mod test {
             ])),
             false,
         ));
+        let person_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Person".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "com.example".to_string(),
+            );
+            m
+        };
+        let maybe_auth_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "MaybeAuth".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1.types".to_string(),
+            );
+            m
+        };
+        let address_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "Address".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1.types".to_string(),
+            );
+            m
+        };
         let rec_a_md = {
             let mut m = HashMap::<String, String>::new();
             m.insert(AVRO_NAME_METADATA_KEY.to_string(), "RecA".to_string());
@@ -8576,11 +8912,18 @@ mod test {
                 true,
             ),
         ]);
-        let kv_item_field = Arc::new(Field::new(
-            item_name,
-            DataType::Struct(kv_fields.clone()),
-            false,
-        ));
+        let kv_md = {
+            let mut m = HashMap::<String, String>::new();
+            m.insert(AVRO_NAME_METADATA_KEY.to_string(), "KV".to_string());
+            m.insert(
+                AVRO_NAMESPACE_METADATA_KEY.to_string(),
+                "org.apache.arrow.avrotests.v1.types".to_string(),
+            );
+            m
+        };
+        let kv_item_field = Arc::new(
+            Field::new(item_name, DataType::Struct(kv_fields.clone()), false).with_metadata(kv_md),
+        );
         let map_int_entries = Arc::new(Field::new(
             "entries",
             DataType::Struct(Fields::from(vec![
@@ -8652,14 +8995,17 @@ mod test {
         #[cfg(not(feature = "small_decimals"))]
         let dec10_dt = DataType::Decimal128(10, 2);
         let fields: Vec<FieldRef> = vec![
-            Arc::new(Field::new(
-                "person",
-                DataType::Struct(Fields::from(vec![
-                    Field::new("name", DataType::Utf8, false),
-                    Field::new("age", DataType::Int32, false),
-                ])),
-                false,
-            )),
+            Arc::new(
+                Field::new(
+                    "person",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("name", DataType::Utf8, false),
+                        Field::new("age", DataType::Int32, false),
+                    ])),
+                    false,
+                )
+                .with_metadata(person_md),
+            ),
             Arc::new(Field::new("old_count", DataType::Int32, false)),
             Arc::new(Field::new(
                 "union_map_or_array_int",
@@ -8691,23 +9037,29 @@ mod test {
                 DataType::Union(uf_union_big.clone(), UnionMode::Dense),
                 false,
             )),
-            Arc::new(Field::new(
-                "maybe_auth",
-                DataType::Struct(Fields::from(vec![
-                    Field::new("user", DataType::Utf8, false),
-                    Field::new("token", DataType::Binary, true), // [bytes,null] -> nullable bytes
-                ])),
-                false,
-            )),
-            Arc::new(Field::new(
-                "address",
-                DataType::Struct(Fields::from(vec![
-                    Field::new("street_name", DataType::Utf8, false),
-                    Field::new("zip", DataType::Int32, false),
-                    Field::new("country", DataType::Utf8, false),
-                ])),
-                false,
-            )),
+            Arc::new(
+                Field::new(
+                    "maybe_auth",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("user", DataType::Utf8, false),
+                        Field::new("token", DataType::Binary, true), // [bytes,null] -> nullable bytes
+                    ])),
+                    false,
+                )
+                .with_metadata(maybe_auth_md),
+            ),
+            Arc::new(
+                Field::new(
+                    "address",
+                    DataType::Struct(Fields::from(vec![
+                        Field::new("street_name", DataType::Utf8, false),
+                        Field::new("zip", DataType::Int32, false),
+                        Field::new("country", DataType::Utf8, false),
+                    ])),
+                    false,
+                )
+                .with_metadata(address_md),
+            ),
             Arc::new(Field::new(
                 "map_union",
                 DataType::Map(map_entries_field.clone(), false),
diff --git a/arrow-avro/src/reader/record.rs b/arrow-avro/src/reader/record.rs
index 7701eeea725a..97cdeed20fc6 100644
--- a/arrow-avro/src/reader/record.rs
+++ b/arrow-avro/src/reader/record.rs
@@ -18,8 +18,8 @@
 //! Avro Decoder for Arrow types.
 
 use crate::codec::{
-    AvroDataType, AvroField, AvroLiteral, Codec, Promotion, ResolutionInfo, ResolvedRecord,
-    ResolvedUnion,
+    AvroDataType, AvroLiteral, Codec, EnumMapping, Promotion, ResolutionInfo, ResolvedField,
+    ResolvedRecord, ResolvedUnion, Tz,
 };
 use crate::errors::AvroError;
 use crate::reader::cursor::AvroCursor;
@@ -38,22 +38,14 @@ use arrow_schema::{
 };
 #[cfg(feature = "avro_custom_types")]
 use arrow_select::take::{TakeOptions, take};
-use std::cmp::Ordering;
-use std::sync::Arc;
 use strum_macros::AsRefStr;
 use uuid::Uuid;
 
-const DEFAULT_CAPACITY: usize = 1024;
+use std::cmp::Ordering;
+use std::mem;
+use std::sync::Arc;
 
-/// Runtime plan for decoding reader-side `["null", T]` types.
-#[derive(Clone, Copy, Debug)]
-enum NullablePlan {
-    /// Writer actually wrote a union (branch tag present).
-    ReadTag,
-    /// Writer wrote a single (non-union) value resolved to the non-null branch
-    /// of the reader union; do NOT read a branch tag, but apply any promotion.
-    FromSingle { promotion: Promotion },
-}
+const DEFAULT_CAPACITY: usize = 1024;
 
 /// Macro to decode a decimal payload for a given width and integer type.
 macro_rules! decode_decimal {
@@ -121,13 +113,22 @@ impl RecordDecoder {
                 // Build Arrow schema fields and per-child decoders
                 let mut arrow_fields = Vec::with_capacity(reader_fields.len());
                 let mut encodings = Vec::with_capacity(reader_fields.len());
+                let mut field_defaults = Vec::with_capacity(reader_fields.len());
                 for avro_field in reader_fields.iter() {
                     arrow_fields.push(avro_field.field());
                     encodings.push(Decoder::try_new(avro_field.data_type())?);
+
+                    if let Some(ResolutionInfo::DefaultValue(lit)) =
+                        avro_field.data_type().resolution.as_ref()
+                    {
+                        field_defaults.push(Some(lit.clone()));
+                    } else {
+                        field_defaults.push(None);
+                    }
                 }
                 let projector = match data_type.resolution.as_ref() {
                     Some(ResolutionInfo::Record(rec)) => {
-                        Some(ProjectorBuilder::try_new(rec, reader_fields).build()?)
+                        Some(ProjectorBuilder::try_new(rec, &field_defaults).build()?)
                     }
                     _ => None,
                 };
@@ -179,12 +180,6 @@ impl RecordDecoder {
     }
 }
 
-#[derive(Debug)]
-struct EnumResolution {
-    mapping: Arc<[i32]>,
-    default_index: i32,
-}
-
 #[derive(Debug, AsRefStr)]
 enum Decoder {
     Null(usize),
@@ -232,9 +227,9 @@ enum Decoder {
     Date32(Vec<i32>),
     TimeMillis(Vec<i32>),
     TimeMicros(Vec<i64>),
-    TimestampMillis(bool, Vec<i64>),
-    TimestampMicros(bool, Vec<i64>),
-    TimestampNanos(bool, Vec<i64>),
+    TimestampMillis(Option<Tz>, Vec<i64>),
+    TimestampMicros(Option<Tz>, Vec<i64>),
+    TimestampNanos(Option<Tz>, Vec<i64>),
     Int32ToInt64(Vec<i64>),
     Int32ToFloat32(Vec<f32>),
     Int32ToFloat64(Vec<f64>),
@@ -249,7 +244,12 @@ enum Decoder {
     /// String data encoded as UTF-8 bytes, but mapped to Arrow's StringViewArray
     StringView(OffsetBufferBuilder<i32>, Vec<u8>),
     Array(FieldRef, OffsetBufferBuilder<i32>, Box<Decoder>),
-    Record(Fields, Vec<Decoder>, Option<Projector>),
+    Record(
+        Fields,
+        Vec<Decoder>,
+        Vec<Option<AvroLiteral>>,
+        Option<Projector>,
+    ),
     Map(
         FieldRef,
         OffsetBufferBuilder<i32>,
@@ -270,7 +270,7 @@ enum Decoder {
     #[cfg(feature = "avro_custom_types")]
     RunEndEncoded(u8, usize, Box<Decoder>),
     Union(UnionDecoder),
-    Nullable(Nullability, NullBufferBuilder, Box<Decoder>, NullablePlan),
+    Nullable(NullablePlan, NullBufferBuilder, Box<Decoder>),
 }
 
 impl Decoder {
@@ -279,7 +279,7 @@ impl Decoder {
             if info.writer_is_union && !info.reader_is_union {
                 let mut clone = data_type.clone();
                 clone.resolution = None; // Build target base decoder without Union resolution
-                let target = Box::new(Self::try_new_internal(&clone)?);
+                let target = Self::try_new_internal(&clone)?;
                 let decoder = Self::Union(
                     UnionDecoderBuilder::new()
                         .with_resolved_union(info.clone())
@@ -295,7 +295,7 @@ impl Decoder {
     fn try_new_internal(data_type: &AvroDataType) -> Result<Self, AvroError> {
         // Extract just the Promotion (if any) to simplify pattern matching
         let promotion = match data_type.resolution.as_ref() {
-            Some(ResolutionInfo::Promotion(p)) => Some(p),
+            Some(ResolutionInfo::Promotion(p)) => Some(*p),
             _ => None,
         };
         let decoder = match (data_type.codec(), promotion) {
@@ -347,14 +347,14 @@ impl Decoder {
             (Codec::Date32, _) => Self::Date32(Vec::with_capacity(DEFAULT_CAPACITY)),
             (Codec::TimeMillis, _) => Self::TimeMillis(Vec::with_capacity(DEFAULT_CAPACITY)),
             (Codec::TimeMicros, _) => Self::TimeMicros(Vec::with_capacity(DEFAULT_CAPACITY)),
-            (Codec::TimestampMillis(is_utc), _) => {
-                Self::TimestampMillis(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
+            (Codec::TimestampMillis(tz), _) => {
+                Self::TimestampMillis(*tz, Vec::with_capacity(DEFAULT_CAPACITY))
             }
-            (Codec::TimestampMicros(is_utc), _) => {
-                Self::TimestampMicros(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
+            (Codec::TimestampMicros(tz), _) => {
+                Self::TimestampMicros(*tz, Vec::with_capacity(DEFAULT_CAPACITY))
             }
-            (Codec::TimestampNanos(is_utc), _) => {
-                Self::TimestampNanos(*is_utc, Vec::with_capacity(DEFAULT_CAPACITY))
+            (Codec::TimestampNanos(tz), _) => {
+                Self::TimestampNanos(*tz, Vec::with_capacity(DEFAULT_CAPACITY))
             }
             #[cfg(feature = "avro_custom_types")]
             (Codec::DurationNanos, _) => {
@@ -466,10 +466,9 @@ impl Decoder {
             }
             (Codec::Enum(symbols), _) => {
                 let res = match data_type.resolution.as_ref() {
-                    Some(ResolutionInfo::EnumMapping(mapping)) => Some(EnumResolution {
-                        mapping: mapping.mapping.clone(),
-                        default_index: mapping.default_index,
-                    }),
+                    Some(ResolutionInfo::EnumMapping(mapping)) => {
+                        Some(EnumResolution::new(mapping))
+                    }
                     _ => None,
                 };
                 Self::Enum(Vec::with_capacity(DEFAULT_CAPACITY), symbols.clone(), res)
@@ -477,18 +476,27 @@ impl Decoder {
             (Codec::Struct(fields), _) => {
                 let mut arrow_fields = Vec::with_capacity(fields.len());
                 let mut encodings = Vec::with_capacity(fields.len());
+                let mut field_defaults = Vec::with_capacity(fields.len());
                 for avro_field in fields.iter() {
                     let encoding = Self::try_new(avro_field.data_type())?;
                     arrow_fields.push(avro_field.field());
                     encodings.push(encoding);
+
+                    if let Some(ResolutionInfo::DefaultValue(lit)) =
+                        avro_field.data_type().resolution.as_ref()
+                    {
+                        field_defaults.push(Some(lit.clone()));
+                    } else {
+                        field_defaults.push(None);
+                    }
                 }
                 let projector =
                     if let Some(ResolutionInfo::Record(rec)) = data_type.resolution.as_ref() {
-                        Some(ProjectorBuilder::try_new(rec, fields).build()?)
+                        Some(ProjectorBuilder::try_new(rec, &field_defaults).build()?)
                     } else {
                         None
                     };
-                Self::Record(arrow_fields.into(), encodings, projector)
+                Self::Record(arrow_fields.into(), encodings, field_defaults, projector)
             }
             (Codec::Map(child), _) => {
                 let val_field = child.field_with_name("value");
@@ -568,20 +576,49 @@ impl Decoder {
         };
         Ok(match data_type.nullability() {
             Some(nullability) => {
-                // Default to reading a union branch tag unless the resolution proves otherwise.
-                let mut plan = NullablePlan::ReadTag;
-                if let Some(ResolutionInfo::Union(info)) = data_type.resolution.as_ref() {
-                    if !info.writer_is_union && info.reader_is_union {
-                        if let Some(Some((_reader_idx, promo))) = info.writer_to_reader.first() {
-                            plan = NullablePlan::FromSingle { promotion: *promo };
+                // Default to reading a union branch tag unless the resolution directs otherwise.
+                let plan = match &data_type.resolution {
+                    None => NullablePlan::ReadTag {
+                        nullability,
+                        resolution: ResolutionPlan::Promotion(Promotion::Direct),
+                    },
+                    Some(ResolutionInfo::Promotion(_)) => {
+                        // Promotions should have been incorporated
+                        // into the inner decoder.
+                        NullablePlan::FromSingle {
+                            resolution: ResolutionPlan::Promotion(Promotion::Direct),
                         }
                     }
-                }
+                    Some(ResolutionInfo::Union(info)) if !info.writer_is_union => {
+                        let Some(Some((_, resolution))) = info.writer_to_reader.first() else {
+                            return Err(AvroError::SchemaError(
+                                "unexpected union resolution info for non-union writer and union reader type".into(),
+                            ));
+                        };
+                        let resolution = ResolutionPlan::try_new(&decoder, resolution)?;
+                        NullablePlan::FromSingle { resolution }
+                    }
+                    Some(ResolutionInfo::Union(info)) => {
+                        let Some((_, resolution)) =
+                            info.writer_to_reader[nullability.non_null_index()].as_ref()
+                        else {
+                            return Err(AvroError::SchemaError(
+                                "unexpected union resolution info for nullable writer type".into(),
+                            ));
+                        };
+                        NullablePlan::ReadTag {
+                            nullability,
+                            resolution: ResolutionPlan::try_new(&decoder, resolution)?,
+                        }
+                    }
+                    Some(resolution) => NullablePlan::FromSingle {
+                        resolution: ResolutionPlan::try_new(&decoder, resolution)?,
+                    },
+                };
                 Self::Nullable(
-                    nullability,
+                    plan,
                     NullBufferBuilder::new(DEFAULT_CAPACITY),
                     Box::new(decoder),
-                    plan,
                 )
             }
             None => decoder,
@@ -645,7 +682,7 @@ impl Decoder {
             Self::Array(_, offsets, _) => {
                 offsets.push_length(0);
             }
-            Self::Record(_, e, _) => {
+            Self::Record(_, e, _, _) => {
                 for encoding in e.iter_mut() {
                     encoding.append_null()?;
                 }
@@ -670,7 +707,7 @@ impl Decoder {
                 inner.append_null()?;
             }
             Self::Union(u) => u.append_null()?,
-            Self::Nullable(_, null_buffer, inner, _) => {
+            Self::Nullable(_, null_buffer, inner) => {
                 null_buffer.append(false);
                 inner.append_null()?;
             }
@@ -681,7 +718,7 @@ impl Decoder {
     /// Append a single default literal into the decoder's buffers
     fn append_default(&mut self, lit: &AvroLiteral) -> Result<(), AvroError> {
         match self {
-            Self::Nullable(_, nb, inner, _) => {
+            Self::Nullable(_, nb, inner) => {
                 if matches!(lit, AvroLiteral::Null) {
                     nb.append(false);
                     inner.append_null()
@@ -1087,14 +1124,14 @@ impl Decoder {
                 inner.append_default(lit)
             }
             Self::Union(u) => u.append_default(lit),
-            Self::Record(field_meta, decoders, projector) => match lit {
+            Self::Record(field_meta, decoders, field_defaults, _) => match lit {
                 AvroLiteral::Map(entries) => {
                     for (i, dec) in decoders.iter_mut().enumerate() {
                         let name = field_meta[i].name();
                         if let Some(sub) = entries.get(name) {
                             dec.append_default(sub)?;
-                        } else if let Some(proj) = projector.as_ref() {
-                            proj.project_default(dec, i)?;
+                        } else if let Some(default_literal) = field_defaults[i].as_ref() {
+                            dec.append_default(default_literal)?;
                         } else {
                             dec.append_null()?;
                         }
@@ -1103,8 +1140,8 @@ impl Decoder {
                 }
                 AvroLiteral::Null => {
                     for (i, dec) in decoders.iter_mut().enumerate() {
-                        if let Some(proj) = projector.as_ref() {
-                            proj.project_default(dec, i)?;
+                        if let Some(default_literal) = field_defaults[i].as_ref() {
+                            dec.append_default(default_literal)?;
                         } else {
                             dec.append_null()?;
                         }
@@ -1246,12 +1283,12 @@ impl Decoder {
                 let total_items = read_blocks(buf, |cursor| encoding.decode(cursor))?;
                 off.push_length(total_items);
             }
-            Self::Record(_, encodings, None) => {
+            Self::Record(_, encodings, _, None) => {
                 for encoding in encodings {
                     encoding.decode(buf)?;
                 }
             }
-            Self::Record(_, encodings, Some(proj)) => {
+            Self::Record(_, encodings, _, Some(proj)) => {
                 proj.project_record(buf, encodings)?;
             }
             Self::Map(_, koff, moff, kdata, valdec) => {
@@ -1286,18 +1323,8 @@ impl Decoder {
             }
             Self::Enum(indices, _, Some(res)) => {
                 let raw = buf.get_int()?;
-                let resolved = usize::try_from(raw)
-                    .ok()
-                    .and_then(|idx| res.mapping.get(idx).copied())
-                    .filter(|&idx| idx >= 0)
-                    .unwrap_or(res.default_index);
-                if resolved >= 0 {
-                    indices.push(resolved);
-                } else {
-                    return Err(AvroError::ParseError(format!(
-                        "Enum symbol index {raw} not resolvable and no default provided",
-                    )));
-                }
+                let resolved = res.resolve(raw)?;
+                indices.push(resolved);
             }
             Self::Duration(builder) => {
                 let b = buf.get_fixed(12)?;
@@ -1313,26 +1340,31 @@ impl Decoder {
                 inner.decode(buf)?;
             }
             Self::Union(u) => u.decode(buf)?,
-            Self::Nullable(order, nb, encoding, plan) => match *plan {
-                NullablePlan::FromSingle { promotion } => {
-                    encoding.decode_with_promotion(buf, promotion)?;
-                    nb.append(true);
-                }
-                NullablePlan::ReadTag => {
-                    let branch = buf.read_vlq()?;
-                    let is_not_null = match *order {
-                        Nullability::NullFirst => branch != 0,
-                        Nullability::NullSecond => branch == 0,
-                    };
-                    if is_not_null {
-                        // It is important to decode before appending to null buffer in case of decode error
-                        encoding.decode(buf)?;
-                    } else {
-                        encoding.append_null()?;
+            Self::Nullable(plan, nb, encoding) => {
+                match plan {
+                    NullablePlan::FromSingle { resolution } => {
+                        encoding.decode_with_resolution(buf, resolution)?;
+                        nb.append(true);
+                    }
+                    NullablePlan::ReadTag {
+                        nullability,
+                        resolution,
+                    } => {
+                        let branch = buf.read_vlq()?;
+                        let is_not_null = match *nullability {
+                            Nullability::NullFirst => branch != 0,
+                            Nullability::NullSecond => branch == 0,
+                        };
+                        if is_not_null {
+                            // It is important to decode before appending to null buffer in case of decode error
+                            encoding.decode_with_resolution(buf, resolution)?;
+                        } else {
+                            encoding.append_null()?;
+                        }
+                        nb.append(is_not_null);
                     }
-                    nb.append(is_not_null);
                 }
-            },
+            }
         }
         Ok(())
     }
@@ -1401,10 +1433,49 @@ impl Decoder {
         }
     }
 
+    fn decode_with_resolution<'d>(
+        &'d mut self,
+        buf: &mut AvroCursor<'_>,
+        resolution: &'d ResolutionPlan,
+    ) -> Result<(), AvroError> {
+        #[cfg(feature = "avro_custom_types")]
+        if let Self::RunEndEncoded(_, len, inner) = self {
+            *len += 1;
+            return inner.decode_with_resolution(buf, resolution);
+        }
+
+        match resolution {
+            ResolutionPlan::Promotion(promotion) => {
+                let promotion = *promotion;
+                self.decode_with_promotion(buf, promotion)
+            }
+            ResolutionPlan::DefaultValue(lit) => self.append_default(lit),
+            ResolutionPlan::EnumMapping(res) => {
+                let Self::Enum(indices, _, _) = self else {
+                    return Err(AvroError::SchemaError(
+                        "enum mapping resolution provided for non-enum decoder".into(),
+                    ));
+                };
+                let raw = buf.get_int()?;
+                let resolved = res.resolve(raw)?;
+                indices.push(resolved);
+                Ok(())
+            }
+            ResolutionPlan::Record(proj) => {
+                let Self::Record(_, encodings, _, _) = self else {
+                    return Err(AvroError::SchemaError(
+                        "record projection provided for non-record decoder".into(),
+                    ));
+                };
+                proj.project_record(buf, encodings)
+            }
+        }
+    }
+
     /// Flush decoded records to an [`ArrayRef`]
     fn flush(&mut self, nulls: Option<NullBuffer>) -> Result<ArrayRef, AvroError> {
         Ok(match self {
-            Self::Nullable(_, n, e, _) => e.flush(n.finish())?,
+            Self::Nullable(_, n, e) => e.flush(n.finish())?,
             Self::Null(size) => Arc::new(NullArray::new(std::mem::replace(size, 0))),
             Self::Boolean(b) => Arc::new(BooleanArray::new(b.finish(), nulls)),
             Self::Int32(values) => Arc::new(flush_primitive::<Int32Type>(values, nulls)),
@@ -1416,17 +1487,17 @@ impl Decoder {
             Self::TimeMicros(values) => {
                 Arc::new(flush_primitive::<Time64MicrosecondType>(values, nulls))
             }
-            Self::TimestampMillis(is_utc, values) => Arc::new(
+            Self::TimestampMillis(tz, values) => Arc::new(
                 flush_primitive::<TimestampMillisecondType>(values, nulls)
-                    .with_timezone_opt(is_utc.then(|| "+00:00")),
+                    .with_timezone_opt(tz.as_ref().map(|tz| tz.to_string())),
             ),
-            Self::TimestampMicros(is_utc, values) => Arc::new(
+            Self::TimestampMicros(tz, values) => Arc::new(
                 flush_primitive::<TimestampMicrosecondType>(values, nulls)
-                    .with_timezone_opt(is_utc.then(|| "+00:00")),
+                    .with_timezone_opt(tz.as_ref().map(|tz| tz.to_string())),
             ),
-            Self::TimestampNanos(is_utc, values) => Arc::new(
+            Self::TimestampNanos(tz, values) => Arc::new(
                 flush_primitive::<TimestampNanosecondType>(values, nulls)
-                    .with_timezone_opt(is_utc.then(|| "+00:00")),
+                    .with_timezone_opt(tz.as_ref().map(|tz| tz.to_string())),
             ),
             #[cfg(feature = "avro_custom_types")]
             Self::DurationSecond(values) => {
@@ -1533,7 +1604,7 @@ impl Decoder {
                 let offsets = flush_offsets(offsets);
                 Arc::new(ListArray::try_new(field.clone(), offsets, values, nulls)?)
             }
-            Self::Record(fields, encodings, _) => {
+            Self::Record(fields, encodings, _, _) => {
                 let arrays = encodings
                     .iter_mut()
                     .map(|x| x.flush(None))
@@ -1678,6 +1749,83 @@ impl Decoder {
     }
 }
 
+/// Runtime plan for decoding reader-side `["null", T]` types.
+#[derive(Debug)]
+enum NullablePlan {
+    /// Writer actually wrote a union (branch tag present).
+    ReadTag {
+        nullability: Nullability,
+        resolution: ResolutionPlan,
+    },
+    /// Writer wrote a single (non-union) value resolved to the non-null branch
+    /// of the reader union; do NOT read a branch tag, but apply any resolution.
+    FromSingle { resolution: ResolutionPlan },
+}
+
+/// Runtime plan for resolving writer-reader type differences.
+#[derive(Debug)]
+enum ResolutionPlan {
+    /// Indicates that the writer's type should be promoted to the reader's type.
+    Promotion(Promotion),
+    /// Provides a default value for the field missing in the writer type.
+    DefaultValue(AvroLiteral),
+    /// Provides mapping information for resolving enums.
+    EnumMapping(EnumResolution),
+    /// Provides projection information for record fields.
+    Record(Projector),
+}
+
+impl ResolutionPlan {
+    fn try_new(decoder: &Decoder, resolution: &ResolutionInfo) -> Result<Self, AvroError> {
+        match (decoder, resolution) {
+            (_, ResolutionInfo::Promotion(p)) => Ok(ResolutionPlan::Promotion(*p)),
+            (_, ResolutionInfo::DefaultValue(lit)) => Ok(ResolutionPlan::DefaultValue(lit.clone())),
+            (_, ResolutionInfo::EnumMapping(m)) => {
+                Ok(ResolutionPlan::EnumMapping(EnumResolution::new(m)))
+            }
+            (Decoder::Record(_, _, field_defaults, _), ResolutionInfo::Record(r)) => Ok(
+                ResolutionPlan::Record(ProjectorBuilder::try_new(r, field_defaults).build()?),
+            ),
+            (_, ResolutionInfo::Record(_)) => Err(AvroError::SchemaError(
+                "record resolution on non-record decoder".into(),
+            )),
+            (_, ResolutionInfo::Union(_)) => Err(AvroError::SchemaError(
+                "union variant cannot be resolved to a union type".into(),
+            )),
+        }
+    }
+}
+
+#[derive(Debug)]
+struct EnumResolution {
+    mapping: Arc<[i32]>,
+    default_index: i32,
+}
+
+impl EnumResolution {
+    fn new(mapping: &EnumMapping) -> Self {
+        EnumResolution {
+            mapping: mapping.mapping.clone(),
+            default_index: mapping.default_index,
+        }
+    }
+
+    fn resolve(&self, index: i32) -> Result<i32, AvroError> {
+        let resolved = usize::try_from(index)
+            .ok()
+            .and_then(|idx| self.mapping.get(idx).copied())
+            .filter(|&idx| idx >= 0)
+            .unwrap_or(self.default_index);
+        if resolved >= 0 {
+            Ok(resolved)
+        } else {
+            Err(AvroError::ParseError(format!(
+                "Enum symbol index {index} not resolvable and no default provided",
+            )))
+        }
+    }
+}
+
 // A lookup table for resolving fields between writer and reader schemas during record projection.
 #[derive(Debug)]
 struct DispatchLookupTable {
@@ -1697,11 +1845,11 @@ struct DispatchLookupTable {
     // - `to_reader.len() == promotion.len()` and matches the reader field count.
     // - If `to_reader[r] == NO_SOURCE`, `promotion[r]` is ignored.
     to_reader: Box<[i8]>,
-    // For each reader field `r`, specifies the `Promotion` to apply to the writer's value.
+    // For each reader field `r`, specifies the resolution to apply to the writer's value.
     //
     // This is used when a writer field's type can be promoted to a reader field's type
     // (e.g., `Int` to `Long`). It is ignored if `to_reader[r] == NO_SOURCE`.
-    promotion: Box<[Promotion]>,
+    resolution: Box<[ResolutionPlan]>,
 }
 
 // Sentinel used in `DispatchLookupTable::to_reader` to mark
@@ -1710,64 +1858,94 @@ const NO_SOURCE: i8 = -1;
 
 impl DispatchLookupTable {
     fn from_writer_to_reader(
-        promotion_map: &[Option<(usize, Promotion)>],
+        reader_branches: &[Decoder],
+        resolution_map: &[Option<(usize, ResolutionInfo)>],
     ) -> Result<Self, AvroError> {
-        let mut to_reader = Vec::with_capacity(promotion_map.len());
-        let mut promotion = Vec::with_capacity(promotion_map.len());
-        for map in promotion_map {
-            match *map {
-                Some((idx, promo)) => {
+        let mut to_reader = Vec::with_capacity(resolution_map.len());
+        let mut resolution = Vec::with_capacity(resolution_map.len());
+        for map in resolution_map {
+            match map {
+                Some((idx, res)) => {
+                    let idx = *idx;
                     let idx_i8 = i8::try_from(idx).map_err(|_| {
                         AvroError::SchemaError(format!(
                             "Reader branch index {idx} exceeds i8 range (max {})",
                             i8::MAX
                         ))
                     })?;
+                    let plan = ResolutionPlan::try_new(&reader_branches[idx], res)?;
                     to_reader.push(idx_i8);
-                    promotion.push(promo);
+                    resolution.push(plan);
                 }
                 None => {
                     to_reader.push(NO_SOURCE);
-                    promotion.push(Promotion::Direct);
+                    resolution.push(ResolutionPlan::DefaultValue(AvroLiteral::Null));
                 }
             }
         }
         Ok(Self {
             to_reader: to_reader.into_boxed_slice(),
-            promotion: promotion.into_boxed_slice(),
+            resolution: resolution.into_boxed_slice(),
         })
     }
 
-    // Resolve a writer branch index to (reader_idx, promotion)
+    // Resolve a writer branch index to (reader_idx, resolution)
     #[inline]
-    fn resolve(&self, writer_index: usize) -> Option<(usize, Promotion)> {
+    fn resolve(&self, writer_index: usize) -> Option<(usize, &ResolutionPlan)> {
         let reader_index = *self.to_reader.get(writer_index)?;
-        (reader_index >= 0).then(|| (reader_index as usize, self.promotion[writer_index]))
+        (reader_index >= 0).then(|| (reader_index as usize, &self.resolution[writer_index]))
     }
 }
 
 #[derive(Debug)]
 struct UnionDecoder {
     fields: UnionFields,
-    type_ids: Vec<i8>,
-    offsets: Vec<i32>,
-    branches: Vec<Decoder>,
-    counts: Vec<i32>,
-    reader_type_codes: Vec<i8>,
+    branches: UnionDecoderBranches,
     default_emit_idx: usize,
     null_emit_idx: usize,
     plan: UnionReadPlan,
 }
 
+#[derive(Debug, Default)]
+struct UnionDecoderBranches {
+    decoders: Vec<Decoder>,
+    reader_type_codes: Vec<i8>,
+    type_ids: Vec<i8>,
+    offsets: Vec<i32>,
+    counts: Vec<i32>,
+}
+
+impl UnionDecoderBranches {
+    fn new(decoders: Vec<Decoder>, reader_type_codes: Vec<i8>) -> Self {
+        let branch_len = decoders.len().max(reader_type_codes.len());
+        Self {
+            decoders,
+            reader_type_codes,
+            type_ids: Vec::with_capacity(DEFAULT_CAPACITY),
+            offsets: Vec::with_capacity(DEFAULT_CAPACITY),
+            counts: vec![0; branch_len],
+        }
+    }
+
+    fn emit_to(&mut self, reader_idx: usize) -> Result<&mut Decoder, AvroError> {
+        let branches_len = self.decoders.len();
+        let Some(reader_branch) = self.decoders.get_mut(reader_idx) else {
+            return Err(AvroError::ParseError(format!(
+                "Union branch index {reader_idx} out of range ({branches_len} branches)"
+            )));
+        };
+        self.type_ids.push(self.reader_type_codes[reader_idx]);
+        self.offsets.push(self.counts[reader_idx]);
+        self.counts[reader_idx] += 1;
+        Ok(reader_branch)
+    }
+}
+
 impl Default for UnionDecoder {
     fn default() -> Self {
         Self {
             fields: UnionFields::empty(),
-            type_ids: Vec::new(),
-            offsets: Vec::new(),
-            branches: Vec::new(),
-            counts: Vec::new(),
-            reader_type_codes: Vec::new(),
+            branches: Default::default(),
             default_emit_idx: 0,
             null_emit_idx: 0,
             plan: UnionReadPlan::Passthrough,
@@ -1782,7 +1960,7 @@ enum UnionReadPlan {
     },
     FromSingle {
         reader_idx: usize,
-        promotion: Promotion,
+        resolution: ResolutionPlan,
     },
     ToSingle {
         target: Box<Decoder>,
@@ -1791,6 +1969,47 @@ enum UnionReadPlan {
     Passthrough,
 }
 
+impl UnionReadPlan {
+    fn from_resolved(
+        reader_branches: &[Decoder],
+        resolved: Option<ResolvedUnion>,
+    ) -> Result<Self, AvroError> {
+        let Some(info) = resolved else {
+            return Ok(Self::Passthrough);
+        };
+        match (info.writer_is_union, info.reader_is_union) {
+            (true, true) => {
+                let lookup_table =
+                    DispatchLookupTable::from_writer_to_reader(reader_branches, &info.writer_to_reader)?;
+                Ok(Self::ReaderUnion { lookup_table })
+            }
+            (false, true) => {
+                let Some((idx, resolution)) =
+                    info.writer_to_reader.first().and_then(Option::as_ref)
+                else {
+                    return Err(AvroError::SchemaError(
+                        "Writer type does not match any reader union branch".to_string(),
+                    ));
+                };
+                let reader_idx = *idx;
+                Ok(Self::FromSingle {
+                    reader_idx,
+                    resolution: ResolutionPlan::try_new(&reader_branches[reader_idx], resolution)?,
+                })
+            }
+            (true, false) => Err(AvroError::InvalidArgument(
+                "UnionDecoder::try_new cannot build writer-union to single; use UnionDecoderBuilder with a target"
+                    .to_string(),
+            )),
+            // (false, false) is invalid and should never be constructed by the resolver.
+            _ => Err(AvroError::SchemaError(
+                "ResolvedUnion constructed for non-union sides; resolver should return None"
+                    .to_string(),
+            )),
+        }
+    }
+}
+
 impl UnionDecoder {
     fn try_new(
         fields: UnionFields,
@@ -1801,7 +2020,6 @@ impl UnionDecoder {
         let null_branch = branches.iter().position(|b| matches!(b, Decoder::Null(_)));
         let default_emit_idx = 0;
         let null_emit_idx = null_branch.unwrap_or(default_emit_idx);
-        let branch_len = branches.len().max(reader_type_codes.len());
         // Guard against impractically large unions that cannot be indexed by an Avro int
         let max_addr = (i32::MAX as usize) + 1;
         if branches.len() > max_addr {
@@ -1812,26 +2030,23 @@ impl UnionDecoder {
                 i32::MAX
             )));
         }
+        let plan = UnionReadPlan::from_resolved(&branches, resolved)?;
         Ok(Self {
             fields,
-            type_ids: Vec::with_capacity(DEFAULT_CAPACITY),
-            offsets: Vec::with_capacity(DEFAULT_CAPACITY),
-            branches,
-            counts: vec![0; branch_len],
-            reader_type_codes,
+            branches: UnionDecoderBranches::new(branches, reader_type_codes),
             default_emit_idx,
             null_emit_idx,
-            plan: Self::plan_from_resolved(resolved)?,
+            plan,
         })
     }
 
-    fn try_new_from_writer_union(
-        info: ResolvedUnion,
-        target: Box<Decoder>,
-    ) -> Result<Self, AvroError> {
+    fn with_single_target(target: Decoder, info: ResolvedUnion) -> Result<Self, AvroError> {
         // This constructor is only for writer-union to single-type resolution
         debug_assert!(info.writer_is_union && !info.reader_is_union);
-        let lookup_table = DispatchLookupTable::from_writer_to_reader(&info.writer_to_reader)?;
+        let mut reader_branches = [target];
+        let lookup_table =
+            DispatchLookupTable::from_writer_to_reader(&reader_branches, &info.writer_to_reader)?;
+        let target = Box::new(mem::replace(&mut reader_branches[0], Decoder::Null(0)));
         Ok(Self {
             plan: UnionReadPlan::ToSingle {
                 target,
@@ -1841,41 +2056,6 @@ impl UnionDecoder {
         })
     }
 
-    fn plan_from_resolved(resolved: Option<ResolvedUnion>) -> Result<UnionReadPlan, AvroError> {
-        let Some(info) = resolved else {
-            return Ok(UnionReadPlan::Passthrough);
-        };
-        match (info.writer_is_union, info.reader_is_union) {
-            (true, true) => {
-                let lookup_table =
-                    DispatchLookupTable::from_writer_to_reader(&info.writer_to_reader)?;
-                Ok(UnionReadPlan::ReaderUnion { lookup_table })
-            }
-            (false, true) => {
-                let Some(&(reader_idx, promotion)) =
-                    info.writer_to_reader.first().and_then(Option::as_ref)
-                else {
-                    return Err(AvroError::SchemaError(
-                        "Writer type does not match any reader union branch".to_string(),
-                    ));
-                };
-                Ok(UnionReadPlan::FromSingle {
-                    reader_idx,
-                    promotion,
-                })
-            }
-            (true, false) => Err(AvroError::InvalidArgument(
-                "UnionDecoder::try_new cannot build writer-union to single; use UnionDecoderBuilder with a target"
-                    .to_string(),
-            )),
-            // (false, false) is invalid and should never be constructed by the resolver.
-            _ => Err(AvroError::SchemaError(
-                "ResolvedUnion constructed for non-union sides; resolver should return None"
-                    .to_string(),
-            )),
-        }
-    }
-
     #[inline]
     fn read_tag(buf: &mut AvroCursor<'_>) -> Result<usize, AvroError> {
         // Avro unions are encoded by first writing the zero-based branch index.
@@ -1896,20 +2076,6 @@ impl UnionDecoder {
         })
     }
 
-    #[inline]
-    fn emit_to(&mut self, reader_idx: usize) -> Result<&mut Decoder, AvroError> {
-        let branches_len = self.branches.len();
-        let Some(reader_branch) = self.branches.get_mut(reader_idx) else {
-            return Err(AvroError::ParseError(format!(
-                "Union branch index {reader_idx} out of range ({branches_len} branches)"
-            )));
-        };
-        self.type_ids.push(self.reader_type_codes[reader_idx]);
-        self.offsets.push(self.counts[reader_idx]);
-        self.counts[reader_idx] += 1;
-        Ok(reader_branch)
-    }
-
     #[inline]
     fn on_decoder<F>(&mut self, fallback_idx: usize, action: F) -> Result<(), AvroError>
     where
@@ -1922,7 +2088,7 @@ impl UnionDecoder {
             UnionReadPlan::FromSingle { reader_idx, .. } => *reader_idx,
             _ => fallback_idx,
         };
-        self.emit_to(reader_idx).and_then(action)
+        self.branches.emit_to(reader_idx).and_then(action)
     }
 
     fn append_null(&mut self) -> Result<(), AvroError> {
@@ -1934,35 +2100,42 @@ impl UnionDecoder {
     }
 
     fn decode(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), AvroError> {
-        let (reader_idx, promotion) = match &mut self.plan {
-            UnionReadPlan::Passthrough => (Self::read_tag(buf)?, Promotion::Direct),
+        match &mut self.plan {
+            UnionReadPlan::Passthrough => {
+                let reader_idx = Self::read_tag(buf)?;
+                let decoder = self.branches.emit_to(reader_idx)?;
+                decoder.decode(buf)
+            }
             UnionReadPlan::ReaderUnion { lookup_table } => {
                 let idx = Self::read_tag(buf)?;
-                lookup_table.resolve(idx).ok_or_else(|| {
-                    AvroError::ParseError(format!(
+                let Some((reader_idx, resolution)) = lookup_table.resolve(idx) else {
+                    return Err(AvroError::ParseError(format!(
                         "Union branch index {idx} not resolvable by reader schema"
-                    ))
-                })?
+                    )));
+                };
+                let decoder = self.branches.emit_to(reader_idx)?;
+                decoder.decode_with_resolution(buf, resolution)
             }
             UnionReadPlan::FromSingle {
                 reader_idx,
-                promotion,
-            } => (*reader_idx, *promotion),
+                resolution,
+            } => {
+                let decoder = self.branches.emit_to(*reader_idx)?;
+                decoder.decode_with_resolution(buf, resolution)
+            }
             UnionReadPlan::ToSingle {
                 target,
                 lookup_table,
             } => {
                 let idx = Self::read_tag(buf)?;
-                return match lookup_table.resolve(idx) {
-                    Some((_, promotion)) => target.decode_with_promotion(buf, promotion),
-                    None => Err(AvroError::ParseError(format!(
-                        "Writer union branch {idx} does not resolve to reader type"
-                    ))),
+                let Some((_, resolution)) = lookup_table.resolve(idx) else {
+                    return Err(AvroError::ParseError(format!(
+                        "Writer union branch index {idx} not resolvable by reader schema"
+                    )));
                 };
+                target.decode_with_resolution(buf, resolution)
             }
-        };
-        let decoder = self.emit_to(reader_idx)?;
-        decoder.decode_with_promotion(buf, promotion)
+        }
     }
 
     fn flush(&mut self, nulls: Option<NullBuffer>) -> Result<ArrayRef, AvroError> {
@@ -1976,13 +2149,20 @@ impl UnionDecoder {
         );
         let children = self
             .branches
+            .decoders
             .iter_mut()
             .map(|d| d.flush(None))
             .collect::<Result<Vec<_>, _>>()?;
         let arr = UnionArray::try_new(
             self.fields.clone(),
-            flush_values(&mut self.type_ids).into_iter().collect(),
-            Some(flush_values(&mut self.offsets).into_iter().collect()),
+            flush_values(&mut self.branches.type_ids)
+                .into_iter()
+                .collect(),
+            Some(
+                flush_values(&mut self.branches.offsets)
+                    .into_iter()
+                    .collect(),
+            ),
             children,
         )
         .map_err(|e| AvroError::ParseError(e.to_string()))?;
@@ -1995,7 +2175,7 @@ struct UnionDecoderBuilder {
     fields: Option<UnionFields>,
     branches: Option<Vec<Decoder>>,
     resolved: Option<ResolvedUnion>,
-    target: Option<Box<Decoder>>,
+    target: Option<Decoder>,
 }
 
 impl UnionDecoderBuilder {
@@ -2018,7 +2198,7 @@ impl UnionDecoderBuilder {
         self
     }
 
-    fn with_target(mut self, target: Box<Decoder>) -> Self {
+    fn with_target(mut self, target: Decoder) -> Self {
         self.target = Some(target);
         self
     }
@@ -2031,7 +2211,7 @@ impl UnionDecoderBuilder {
             (Some(info), None, None, Some(target))
                 if info.writer_is_union && !info.reader_is_union =>
             {
-                UnionDecoder::try_new_from_writer_union(info, target)
+                UnionDecoder::with_single_target(target, info)
             }
             _ => Err(AvroError::InvalidArgument(
                 "Invalid UnionDecoderBuilder configuration: expected either \
@@ -2236,108 +2416,73 @@ fn values_equal_at(arr: &dyn Array, i: usize, j: usize) -> bool {
 
 #[derive(Debug)]
 struct Projector {
-    writer_to_reader: Arc<[Option<usize>]>,
-    skip_decoders: Vec<Option<Skipper>>,
-    field_defaults: Vec<Option<AvroLiteral>>,
+    writer_projections: Vec<FieldProjection>,
     default_injections: Arc<[(usize, AvroLiteral)]>,
 }
 
+#[derive(Debug)]
+enum FieldProjection {
+    ToReader(usize),
+    Skip(Skipper),
+}
+
 #[derive(Debug)]
 struct ProjectorBuilder<'a> {
     rec: &'a ResolvedRecord,
-    reader_fields: Arc<[AvroField]>,
+    field_defaults: &'a [Option<AvroLiteral>],
 }
 
 impl<'a> ProjectorBuilder<'a> {
     #[inline]
-    fn try_new(rec: &'a ResolvedRecord, reader_fields: &Arc<[AvroField]>) -> Self {
+    fn try_new(rec: &'a ResolvedRecord, field_defaults: &'a [Option<AvroLiteral>]) -> Self {
         Self {
             rec,
-            reader_fields: reader_fields.clone(),
+            field_defaults,
         }
     }
 
     #[inline]
     fn build(self) -> Result<Projector, AvroError> {
-        let reader_fields = self.reader_fields;
-        let mut field_defaults: Vec<Option<AvroLiteral>> = Vec::with_capacity(reader_fields.len());
-        for avro_field in reader_fields.as_ref() {
-            if let Some(ResolutionInfo::DefaultValue(lit)) =
-                avro_field.data_type().resolution.as_ref()
-            {
-                field_defaults.push(Some(lit.clone()));
-            } else {
-                field_defaults.push(None);
-            }
-        }
         let mut default_injections: Vec<(usize, AvroLiteral)> =
             Vec::with_capacity(self.rec.default_fields.len());
         for &idx in self.rec.default_fields.as_ref() {
-            let lit = field_defaults
+            let lit = self
+                .field_defaults
                 .get(idx)
                 .and_then(|lit| lit.clone())
                 .unwrap_or(AvroLiteral::Null);
             default_injections.push((idx, lit));
         }
-        let mut skip_decoders: Vec<Option<Skipper>> =
-            Vec::with_capacity(self.rec.skip_fields.len());
-        for datatype in self.rec.skip_fields.as_ref() {
-            let skipper = match datatype {
-                Some(datatype) => Some(Skipper::from_avro(datatype)?),
-                None => None,
-            };
-            skip_decoders.push(skipper);
-        }
+        let writer_projections = self
+            .rec
+            .writer_fields
+            .iter()
+            .map(|field| match field {
+                ResolvedField::ToReader(index) => Ok(FieldProjection::ToReader(*index)),
+                ResolvedField::Skip(datatype) => {
+                    let skipper = Skipper::from_avro(datatype)?;
+                    Ok(FieldProjection::Skip(skipper))
+                }
+            })
+            .collect::<Result<_, AvroError>>()?;
         Ok(Projector {
-            writer_to_reader: self.rec.writer_to_reader.clone(),
-            skip_decoders,
-            field_defaults,
+            writer_projections,
             default_injections: default_injections.into(),
         })
     }
 }
 
 impl Projector {
-    #[inline]
-    fn project_default(&self, decoder: &mut Decoder, index: usize) -> Result<(), AvroError> {
-        // SAFETY: `index` is obtained by listing the reader's record fields (i.e., from
-        // `decoders.iter_mut().enumerate()`), and `field_defaults` was built in
-        // `ProjectorBuilder::build` to have exactly one element per reader field.
-        // Therefore, `index < self.field_defaults.len()` always holds here, so
-        // `self.field_defaults[index]` cannot panic. We only take an immutable reference
-        // via `.as_ref()`, and `self` is borrowed immutably.
-        if let Some(default_literal) = self.field_defaults[index].as_ref() {
-            decoder.append_default(default_literal)
-        } else {
-            decoder.append_null()
-        }
-    }
-
     #[inline]
     fn project_record(
-        &mut self,
+        &self,
         buf: &mut AvroCursor<'_>,
         encodings: &mut [Decoder],
     ) -> Result<(), AvroError> {
-        debug_assert_eq!(
-            self.writer_to_reader.len(),
-            self.skip_decoders.len(),
-            "internal invariant: mapping and skipper lists must have equal length"
-        );
-        for (i, (mapping, skipper_opt)) in self
-            .writer_to_reader
-            .iter()
-            .zip(self.skip_decoders.iter_mut())
-            .enumerate()
-        {
-            match (mapping, skipper_opt.as_mut()) {
-                (Some(reader_index), _) => encodings[*reader_index].decode(buf)?,
-                (None, Some(skipper)) => skipper.skip(buf)?,
-                (None, None) => {
-                    return Err(AvroError::SchemaError(format!(
-                        "No skipper available for writer-only field at index {i}",
-                    )));
-                }
+        for field_proj in self.writer_projections.iter() {
+            match field_proj {
+                FieldProjection::ToReader(index) => encodings[*index].decode(buf)?,
+                FieldProjection::Skip(skipper) => skipper.skip(buf)?,
             }
         }
         for (reader_index, lit) in self.default_injections.as_ref() {
@@ -2459,7 +2604,7 @@ impl Skipper {
         Ok(base)
     }
 
-    fn skip(&mut self, buf: &mut AvroCursor<'_>) -> Result<(), AvroError> {
+    fn skip(&self, buf: &mut AvroCursor<'_>) -> Result<(), AvroError> {
         match self {
             Self::Null => Ok(()),
             Self::Boolean => {
@@ -2522,7 +2667,7 @@ impl Skipper {
                 Ok(())
             }
             Self::Struct(fields) => {
-                for f in fields.iter_mut() {
+                for f in fields.iter() {
                     f.skip(buf)?
                 }
                 Ok(())
@@ -2541,7 +2686,7 @@ impl Skipper {
                         (usize::BITS as usize)
                     ))
                 })?;
-                let Some(encoding) = encodings.get_mut(idx) else {
+                let Some(encoding) = encodings.get(idx) else {
                     return Err(AvroError::ParseError(format!(
                         "Union branch index {idx} out of range for skipper ({} branches)",
                         encodings.len()
@@ -3488,10 +3633,12 @@ mod tests {
         let dt = avro_from_codec(Codec::Decimal(4, Some(1), None));
         let inner = Decoder::try_new(&dt).unwrap();
         let mut decoder = Decoder::Nullable(
-            Nullability::NullSecond,
+            NullablePlan::ReadTag {
+                nullability: Nullability::NullSecond,
+                resolution: ResolutionPlan::Promotion(Promotion::Direct),
+            },
             NullBufferBuilder::new(DEFAULT_CAPACITY),
             Box::new(inner),
-            NullablePlan::ReadTag,
         );
         let mut data = Vec::new();
         data.extend_from_slice(&encode_avro_int(0));
@@ -3531,10 +3678,12 @@ mod tests {
         let dt = avro_from_codec(Codec::Decimal(6, Some(2), Some(16)));
         let inner = Decoder::try_new(&dt).unwrap();
         let mut decoder = Decoder::Nullable(
-            Nullability::NullSecond,
+            NullablePlan::ReadTag {
+                nullability: Nullability::NullSecond,
+                resolution: ResolutionPlan::Promotion(Promotion::Direct),
+            },
             NullBufferBuilder::new(DEFAULT_CAPACITY),
             Box::new(inner),
-            NullablePlan::ReadTag,
         );
         let row1 = [
             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
@@ -3971,8 +4120,7 @@ mod tests {
 
     fn make_record_resolved_decoder(
         reader_fields: &[(&str, DataType, bool)],
-        writer_to_reader: Vec<Option<usize>>,
-        skip_decoders: Vec<Option<Skipper>>,
+        writer_projections: Vec<FieldProjection>,
     ) -> Decoder {
         let mut field_refs: Vec<FieldRef> = Vec::with_capacity(reader_fields.len());
         let mut encodings: Vec<Decoder> = Vec::with_capacity(reader_fields.len());
@@ -3992,10 +4140,9 @@ mod tests {
         Decoder::Record(
             fields,
             encodings,
+            vec![None; reader_fields.len()],
             Some(Projector {
-                writer_to_reader: Arc::from(writer_to_reader),
-                skip_decoders,
-                field_defaults: vec![None; reader_fields.len()],
+                writer_projections,
                 default_injections: Arc::from(Vec::<(usize, AvroLiteral)>::new()),
             }),
         )
@@ -4005,8 +4152,10 @@ mod tests {
     fn test_skip_writer_trailing_field_int32() {
         let mut dec = make_record_resolved_decoder(
             &[("id", arrow_schema::DataType::Int32, false)],
-            vec![Some(0), None],
-            vec![None, Some(super::Skipper::Int32)],
+            vec![
+                FieldProjection::ToReader(0),
+                FieldProjection::Skip(super::Skipper::Int32),
+            ],
         );
         let mut data = Vec::new();
         data.extend_from_slice(&encode_avro_int(7));
@@ -4033,8 +4182,11 @@ mod tests {
                 ("id", DataType::Int32, false),
                 ("score", DataType::Int64, false),
             ],
-            vec![Some(0), None, Some(1)],
-            vec![None, Some(Skipper::String), None],
+            vec![
+                FieldProjection::ToReader(0),
+                FieldProjection::Skip(Skipper::String),
+                FieldProjection::ToReader(1),
+            ],
         );
         let mut data = Vec::new();
         data.extend_from_slice(&encode_avro_int(42));
@@ -4065,8 +4217,10 @@ mod tests {
     fn test_skip_writer_array_with_negative_block_count_fast() {
         let mut dec = make_record_resolved_decoder(
             &[("id", DataType::Int32, false)],
-            vec![None, Some(0)],
-            vec![Some(super::Skipper::List(Box::new(Skipper::Int32))), None],
+            vec![
+                FieldProjection::Skip(super::Skipper::List(Box::new(Skipper::Int32))),
+                FieldProjection::ToReader(0),
+            ],
         );
         let mut array_payload = Vec::new();
         array_payload.extend_from_slice(&encode_avro_int(1));
@@ -4097,8 +4251,10 @@ mod tests {
     fn test_skip_writer_map_with_negative_block_count_fast() {
         let mut dec = make_record_resolved_decoder(
             &[("id", DataType::Int32, false)],
-            vec![None, Some(0)],
-            vec![Some(Skipper::Map(Box::new(Skipper::Int32))), None],
+            vec![
+                FieldProjection::Skip(Skipper::Map(Box::new(Skipper::Int32))),
+                FieldProjection::ToReader(0),
+            ],
         );
         let mut entries = Vec::new();
         entries.extend_from_slice(&encode_avro_bytes(b"k1"));
@@ -4130,13 +4286,12 @@ mod tests {
     fn test_skip_writer_nullable_field_union_nullfirst() {
         let mut dec = make_record_resolved_decoder(
             &[("id", DataType::Int32, false)],
-            vec![None, Some(0)],
             vec![
-                Some(super::Skipper::Nullable(
+                FieldProjection::Skip(super::Skipper::Nullable(
                     Nullability::NullFirst,
                     Box::new(super::Skipper::Int32),
                 )),
-                None,
+                FieldProjection::ToReader(0),
             ],
         );
         let mut row1 = Vec::new();
@@ -4346,7 +4501,6 @@ mod tests {
         reader_fields: &[(&str, DataType, bool)],
         field_defaults: Vec<Option<AvroLiteral>>,
         default_injections: Vec<(usize, AvroLiteral)>,
-        writer_to_reader_len: usize,
     ) -> Decoder {
         assert_eq!(
             field_defaults.len(),
@@ -4369,15 +4523,11 @@ mod tests {
             encodings.push(enc);
         }
         let fields: Fields = field_refs.into();
-        let skip_decoders: Vec<Option<Skipper>> =
-            (0..writer_to_reader_len).map(|_| None::<Skipper>).collect();
         let projector = Projector {
-            writer_to_reader: Arc::from(vec![None; writer_to_reader_len]),
-            skip_decoders,
-            field_defaults,
+            writer_projections: vec![],
             default_injections: Arc::from(default_injections),
         };
-        Decoder::Record(fields, encodings, Some(projector))
+        Decoder::Record(fields, encodings, field_defaults, Some(projector))
     }
 
     #[cfg(feature = "avro_custom_types")]
@@ -4631,10 +4781,12 @@ mod tests {
     fn test_default_append_nullable_int32_null_and_value() {
         let inner = Decoder::Int32(Vec::with_capacity(DEFAULT_CAPACITY));
         let mut dec = Decoder::Nullable(
-            Nullability::NullFirst,
+            NullablePlan::ReadTag {
+                nullability: Nullability::NullFirst,
+                resolution: ResolutionPlan::Promotion(Promotion::Direct),
+            },
             NullBufferBuilder::new(DEFAULT_CAPACITY),
             Box::new(inner),
-            NullablePlan::ReadTag,
         );
         dec.append_default(&AvroLiteral::Null).unwrap();
         dec.append_default(&AvroLiteral::Int(11)).unwrap();
@@ -4821,7 +4973,6 @@ mod tests {
             &[("a", DataType::Int32, false), ("b", DataType::Utf8, false)],
             field_defaults,
             vec![],
-            0,
         );
         let mut map: IndexMap<String, AvroLiteral> = IndexMap::new();
         map.insert("a".to_string(), AvroLiteral::Int(7));
@@ -4854,7 +5005,6 @@ mod tests {
             &[("a", DataType::Int32, false), ("b", DataType::Utf8, false)],
             field_defaults,
             vec![],
-            0,
         );
         rec.append_default(&AvroLiteral::Null).unwrap();
         let arr = rec.flush(None).unwrap();
@@ -4885,29 +5035,32 @@ mod tests {
             field_refs.push(Arc::new(ArrowField::new(*name, dt.clone(), *nullable)));
         }
         let enc_a = Decoder::Nullable(
-            Nullability::NullSecond,
+            NullablePlan::ReadTag {
+                nullability: Nullability::NullSecond,
+                resolution: ResolutionPlan::Promotion(Promotion::Direct),
+            },
             NullBufferBuilder::new(DEFAULT_CAPACITY),
             Box::new(Decoder::Int32(Vec::with_capacity(DEFAULT_CAPACITY))),
-            NullablePlan::ReadTag,
         );
         let enc_b = Decoder::Nullable(
-            Nullability::NullSecond,
+            NullablePlan::ReadTag {
+                nullability: Nullability::NullSecond,
+                resolution: ResolutionPlan::Promotion(Promotion::Direct),
+            },
             NullBufferBuilder::new(DEFAULT_CAPACITY),
             Box::new(Decoder::String(
                 OffsetBufferBuilder::new(DEFAULT_CAPACITY),
                 Vec::with_capacity(DEFAULT_CAPACITY),
             )),
-            NullablePlan::ReadTag,
         );
         encoders.push(enc_a);
         encoders.push(enc_b);
+        let field_defaults = vec![None, None]; // no defaults -> append_null
         let projector = Projector {
-            writer_to_reader: Arc::from(vec![]),
-            skip_decoders: vec![],
-            field_defaults: vec![None, None], // no defaults -> append_null
+            writer_projections: vec![],
             default_injections: Arc::from(Vec::<(usize, AvroLiteral)>::new()),
         };
-        let mut rec = Decoder::Record(field_refs.into(), encoders, Some(projector));
+        let mut rec = Decoder::Record(field_refs.into(), encoders, field_defaults, Some(projector));
         let mut map: IndexMap<String, AvroLiteral> = IndexMap::new();
         map.insert("a".to_string(), AvroLiteral::Int(9));
         rec.append_default(&AvroLiteral::Map(map)).unwrap();
@@ -4944,7 +5097,6 @@ mod tests {
             ],
             defaults,
             injections,
-            0,
         );
         rec.decode(&mut AvroCursor::new(&[])).unwrap();
         let arr = rec.flush(None).unwrap();
@@ -5034,7 +5186,7 @@ mod tests {
             Codec::DurationSeconds,
         ] {
             let dt = make_avro_dt(codec.clone(), None);
-            let mut s = Skipper::from_avro(&dt)?;
+            let s = Skipper::from_avro(&dt)?;
             for &v in &values {
                 let bytes = encode_avro_long(v);
                 let mut cursor = AvroCursor::new(&bytes);
@@ -5055,7 +5207,7 @@ mod tests {
     #[test]
     fn skipper_nullable_custom_duration_respects_null_first() -> Result<(), AvroError> {
         let dt = make_avro_dt(Codec::DurationNanos, Some(Nullability::NullFirst));
-        let mut s = Skipper::from_avro(&dt)?;
+        let s = Skipper::from_avro(&dt)?;
         match &s {
             Skipper::Nullable(Nullability::NullFirst, inner) => match **inner {
                 Skipper::Int64 => {}
@@ -5084,7 +5236,7 @@ mod tests {
     #[test]
     fn skipper_nullable_custom_duration_respects_null_second() -> Result<(), AvroError> {
         let dt = make_avro_dt(Codec::DurationMicros, Some(Nullability::NullSecond));
-        let mut s = Skipper::from_avro(&dt)?;
+        let s = Skipper::from_avro(&dt)?;
         match &s {
             Skipper::Nullable(Nullability::NullSecond, inner) => match **inner {
                 Skipper::Int64 => {}
@@ -5115,7 +5267,7 @@ mod tests {
     #[test]
     fn skipper_interval_is_fixed12_and_skips_12_bytes() -> Result<(), AvroError> {
         let dt = make_avro_dt(Codec::Interval, None);
-        let mut s = Skipper::from_avro(&dt)?;
+        let s = Skipper::from_avro(&dt)?;
         match s {
             Skipper::DurationFixed12 => {}
             other => panic!("expected DurationFixed12, got {:?}", other),
@@ -5227,12 +5379,11 @@ mod tests {
             Box::new(inner_values),
         );
         let mut dec = Decoder::Nullable(
-            Nullability::NullSecond,
-            NullBufferBuilder::new(DEFAULT_CAPACITY),
-            Box::new(ree),
             NullablePlan::FromSingle {
-                promotion: Promotion::IntToDouble,
+                resolution: ResolutionPlan::Promotion(Promotion::IntToDouble),
             },
+            NullBufferBuilder::new(DEFAULT_CAPACITY),
+            Box::new(ree),
         );
         for v in [1, 1, 2, 2, 2] {
             let bytes = encode_avro_int(v);
@@ -5347,8 +5498,8 @@ mod tests {
     }
 
     #[test]
-    fn test_timestamp_nanos_decoding_utc() {
-        let avro_type = avro_from_codec(Codec::TimestampNanos(true));
+    fn test_timestamp_nanos_decoding_offset_zero() {
+        let avro_type = avro_from_codec(Codec::TimestampNanos(Some(Tz::OffsetZero)));
         let mut decoder = Decoder::try_new(&avro_type).expect("create TimestampNanos decoder");
         let mut data = Vec::new();
         for v in [0_i64, 1_i64, -1_i64, 1_234_567_890_i64] {
@@ -5372,9 +5523,35 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_timestamp_nanos_decoding_utc() {
+        let avro_type = avro_from_codec(Codec::TimestampNanos(Some(Tz::Utc)));
+        let mut decoder = Decoder::try_new(&avro_type).expect("create TimestampNanos decoder");
+        let mut data = Vec::new();
+        for v in [0_i64, 1_i64, -1_i64, 1_234_567_890_i64] {
+            data.extend_from_slice(&encode_avro_long(v));
+        }
+        let mut cur = AvroCursor::new(&data);
+        for _ in 0..4 {
+            decoder.decode(&mut cur).expect("decode nanos ts");
+        }
+        let array = decoder.flush(None).expect("flush nanos ts");
+        let ts = array
+            .as_any()
+            .downcast_ref::<TimestampNanosecondArray>()
+            .expect("TimestampNanosecondArray");
+        assert_eq!(ts.values(), &[0, 1, -1, 1_234_567_890]);
+        match ts.data_type() {
+            DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, tz) => {
+                assert_eq!(tz.as_deref(), Some("UTC"));
+            }
+            other => panic!("expected Timestamp(Nanosecond, Some(\"UTC\")), got {other:?}"),
+        }
+    }
+
     #[test]
     fn test_timestamp_nanos_decoding_local() {
-        let avro_type = avro_from_codec(Codec::TimestampNanos(false));
+        let avro_type = avro_from_codec(Codec::TimestampNanos(None));
         let mut decoder = Decoder::try_new(&avro_type).expect("create TimestampNanos decoder");
         let mut data = Vec::new();
         for v in [10_i64, 20_i64, -30_i64] {
@@ -5401,7 +5578,7 @@ mod tests {
     #[test]
     fn test_timestamp_nanos_decoding_with_nulls() {
         let avro_type = AvroDataType::new(
-            Codec::TimestampNanos(false),
+            Codec::TimestampNanos(None),
             Default::default(),
             Some(Nullability::NullFirst),
         );
diff --git a/arrow-avro/src/schema.rs b/arrow-avro/src/schema.rs
index 90c0d5a1648d..1b0c2e26f773 100644
--- a/arrow-avro/src/schema.rs
+++ b/arrow-avro/src/schema.rs
@@ -78,6 +78,16 @@ pub(crate) enum Nullability {
     NullSecond,
 }
 
+impl Nullability {
+    /// Returns the index of the non-null variant in the union.
+    pub(crate) fn non_null_index(&self) -> usize {
+        match self {
+            Nullability::NullFirst => 1,
+            Nullability::NullSecond => 0,
+        }
+    }
+}
+
 /// Either a [`PrimitiveType`] or a reference to a previously defined named type
 ///
 /// <https://avro.apache.org/docs/1.11.1/specification/#names>
@@ -3331,7 +3341,11 @@ mod tests {
                 false,
             )])),
             false,
-        );
+        )
+        .with_metadata(HashMap::from_iter([(
+            "avro.name".to_owned(),
+            "R".to_owned(),
+        )]));
         assert_eq!(resolved.field(), expected);
     }
 
@@ -3393,7 +3407,11 @@ mod tests {
                 false,
             )])),
             false,
-        );
+        )
+        .with_metadata(HashMap::from_iter([(
+            "avro.name".to_owned(),
+            "R".to_owned(),
+        )]));
         assert_eq!(resolved.field(), expected);
     }
 
@@ -3430,7 +3448,11 @@ mod tests {
                 )])),
             ])),
             false,
-        );
+        )
+        .with_metadata(HashMap::from_iter([(
+            "avro.name".to_owned(),
+            "R".to_owned(),
+        )]));
         assert_eq!(resolved.field(), expected);
     }
 
diff --git a/arrow-buffer/Cargo.toml b/arrow-buffer/Cargo.toml
index 02ea49c37c46..1400c1986361 100644
--- a/arrow-buffer/Cargo.toml
+++ b/arrow-buffer/Cargo.toml
@@ -36,6 +36,7 @@ bench = false
 all-features = true
 
 [features]
+# Enable memory tracking support
 pool = []
 
 [dependencies]
diff --git a/arrow-buffer/src/buffer/boolean.rs b/arrow-buffer/src/buffer/boolean.rs
index f9148c7eb245..420bbf59f3be 100644
--- a/arrow-buffer/src/buffer/boolean.rs
+++ b/arrow-buffer/src/buffer/boolean.rs
@@ -23,7 +23,7 @@ use crate::{
     buffer_bin_xor,
 };
 
-use std::ops::{BitAnd, BitOr, BitXor, Not};
+use std::ops::{BitAnd, BitAndAssign, BitOr, BitOrAssign, BitXor, BitXorAssign, Not};
 
 /// A slice-able [`Buffer`] containing bit-packed booleans
 ///
@@ -67,6 +67,27 @@ use std::ops::{BitAnd, BitOr, BitXor, Not};
 /// Note that the bits marked `?` are not logically part of the mask and may
 /// contain either `0` or `1`
 ///
+/// # Bitwise Operations
+///
+/// `BooleanBuffer` implements the standard bitwise traits for creating a new
+/// buffer ([`BitAnd`], [`BitOr`], [`BitXor`], [`Not`]) as well as the assign variants
+/// for updating an existing buffer in place when possible ([`BitAndAssign`],
+/// [`BitOrAssign`], [`BitXorAssign`]).
+///
+/// ```
+/// # use arrow_buffer::BooleanBuffer;
+/// let mut left = BooleanBuffer::from(&[true, false, true, true] as &[bool]);
+/// let right = BooleanBuffer::from(&[true, true, false, true] as &[bool]);
+///
+/// // Create a new buffer by applying bitwise AND
+/// let anded = &left & &right;
+/// assert_eq!(anded, BooleanBuffer::from(&[true, false, false, true] as &[bool]));
+///
+/// // Update `left` in place by applying bitwise AND in place
+/// left &= &right;
+/// assert_eq!(left, BooleanBuffer::from(&[true, false, false, true] as &[bool]));
+/// ```
+///
 /// # See Also
 /// * [`BooleanBufferBuilder`] for building [`BooleanBuffer`] instances
 /// * [`NullBuffer`] for representing null values in Arrow arrays
@@ -269,7 +290,8 @@ impl BooleanBuffer {
     ///   on the relevant bits; the input `u64` values may contain irrelevant bits
     ///   and may be processed differently on different endian architectures.
     /// * `op` may be called with input bits outside the requested range.
-    /// * The returned `BooleanBuffer` always has zero offset.
+    /// * Returned `BooleanBuffer` may have non zero offset
+    /// * Returned `BooleanBuffer` may have bits set outside the requested range
     ///
     /// # See Also
     /// - [`BooleanBuffer::from_bitwise_unary_op`] for unary operations on a single input buffer.
@@ -284,19 +306,28 @@ impl BooleanBuffer {
     /// let result = BooleanBuffer::from_bitwise_binary_op(
     ///   &left, 0, &right, 0, 12, |a, b| a & b
     /// );
-    /// assert_eq!(result.inner().as_slice(), &[0b10001000u8, 0b00001000u8]);
+    /// assert_eq!(result.len(), 12);
+    /// for i in 0..12 {
+    ///     assert_eq!(result.value(i), left.as_slice()[i / 8] >> (i % 8) & 1 == 1
+    ///         && right.as_slice()[i / 8] >> (i % 8) & 1 == 1);
+    /// }
     /// ```
     ///
     /// # Example: Create new [`BooleanBuffer`] from bitwise `OR` of two byte slices
     /// ```
-    /// # use arrow_buffer::BooleanBuffer;
+    /// # use arrow_buffer::{BooleanBuffer, bit_util};
     /// let left = [0b11001100u8, 0b10111010u8];
     /// let right = [0b10101010u8, 0b11011100u8];
     /// // OR of bits 4..16 from left and bits 0..12 from right
     /// let result = BooleanBuffer::from_bitwise_binary_op(
     ///  &left, 4, &right, 0, 12, |a, b| a | b
     /// );
-    /// assert_eq!(result.inner().as_slice(), &[0b10101110u8, 0b00001111u8]);
+    /// assert_eq!(result.len(), 12);
+    /// for i in 0..12 {
+    ///     let l = bit_util::get_bit(&left, 4 + i);
+    ///     let r = bit_util::get_bit(&right, i);
+    ///     assert_eq!(result.value(i), l | r);
+    /// }
     /// ```
     pub fn from_bitwise_binary_op<F>(
         left: impl AsRef<[u8]>,
@@ -311,39 +342,74 @@ impl BooleanBuffer {
     {
         let left = left.as_ref();
         let right = right.as_ref();
-        // try fast path for aligned input
-        // If the underlying buffers are aligned to u64 we can apply the operation directly on the u64 slices
-        // to improve performance.
-        if left_offset_in_bits & 0x7 == 0 && right_offset_in_bits & 0x7 == 0 {
-            // align to byte boundary
-            let left = &left[left_offset_in_bits / 8..];
-            let right = &right[right_offset_in_bits / 8..];
-
-            unsafe {
-                let (left_prefix, left_u64s, left_suffix) = left.align_to::<u64>();
-                let (right_prefix, right_u64s, right_suffix) = right.align_to::<u64>();
-                // if there is no prefix or suffix, both buffers are aligned and
-                // we can do the operation directly on u64s.
-                // TODO: consider `slice::as_chunks` and `u64::from_le_bytes` when MSRV reaches 1.88.
-                // https://github.com/apache/arrow-rs/pull/9022#discussion_r2639949361
-                if left_prefix.is_empty()
-                    && right_prefix.is_empty()
-                    && left_suffix.is_empty()
-                    && right_suffix.is_empty()
-                {
-                    let result_u64s = left_u64s
+
+        // When both offsets share the same sub-64-bit alignment, we can
+        // align both to 64-bit boundaries and zip u64s directly,
+        // avoiding BitChunks bit-shifting entirely.
+        if left_offset_in_bits % 64 == right_offset_in_bits % 64 {
+            let bit_offset = left_offset_in_bits % 64;
+            let left_end = left_offset_in_bits + len_in_bits;
+            let right_end = right_offset_in_bits + len_in_bits;
+
+            let left_aligned = left_offset_in_bits & !63;
+            let right_aligned = right_offset_in_bits & !63;
+
+            let left_end_bytes = (bit_util::ceil(left_end, 64) * 8).min(left.len());
+            let right_end_bytes = (bit_util::ceil(right_end, 64) * 8).min(right.len());
+
+            let left_slice = &left[left_aligned / 8..left_end_bytes];
+            let right_slice = &right[right_aligned / 8..right_end_bytes];
+
+            let (lp, left_u64s, ls) = unsafe { left_slice.align_to::<u64>() };
+            let (rp, right_u64s, rs) = unsafe { right_slice.align_to::<u64>() };
+
+            match (lp, ls, rp, rs) {
+                ([], [], [], []) => {
+                    let result_u64s: Vec<u64> = left_u64s
                         .iter()
                         .zip(right_u64s.iter())
                         .map(|(l, r)| op(*l, *r))
-                        .collect::<Vec<u64>>();
-                    return BooleanBuffer {
-                        buffer: Buffer::from(result_u64s),
-                        bit_offset: 0,
-                        bit_len: len_in_bits,
-                    };
+                        .collect();
+                    return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits);
                 }
+                ([], left_suf, [], right_suf) => {
+                    let left_iter = left_u64s
+                        .iter()
+                        .cloned()
+                        .chain((!left_suf.is_empty()).then(|| read_u64(left_suf)));
+                    let right_iter = right_u64s
+                        .iter()
+                        .cloned()
+                        .chain((!right_suf.is_empty()).then(|| read_u64(right_suf)));
+                    let result_u64s: Vec<u64> =
+                        left_iter.zip(right_iter).map(|(l, r)| op(l, r)).collect();
+                    return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits);
+                }
+                _ => {}
             }
+
+            // Memory not u64-aligned, use chunks_exact fallback
+            let left_chunks = left_slice.chunks_exact(8);
+            let left_rem = left_chunks.remainder();
+            let right_chunks = right_slice.chunks_exact(8);
+            let right_rem = right_chunks.remainder();
+
+            let left_iter = left_chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap()));
+            let right_iter = right_chunks.map(|c| u64::from_le_bytes(c.try_into().unwrap()));
+
+            let result_u64s: Vec<u64> = if left_rem.is_empty() && right_rem.is_empty() {
+                left_iter.zip(right_iter).map(|(l, r)| op(l, r)).collect()
+            } else {
+                left_iter
+                    .chain(Some(read_u64(left_rem)))
+                    .zip(right_iter.chain(Some(read_u64(right_rem))))
+                    .map(|(l, r)| op(l, r))
+                    .collect()
+            };
+            return BooleanBuffer::new(result_u64s.into(), bit_offset, len_in_bits);
         }
+
+        // Different sub-64-bit alignments: bit-shifting unavoidable
         let left_chunks = BitChunks::new(left, left_offset_in_bits, len_in_bits);
         let right_chunks = BitChunks::new(right, right_offset_in_bits, len_in_bits);
 
@@ -458,7 +524,7 @@ impl BooleanBuffer {
         }
     }
 
-    /// Returns a [`Buffer`] containing the sliced contents of this [`BooleanBuffer`]
+    /// Returns a new [`Buffer`] containing the sliced contents of this [`BooleanBuffer`]
     ///
     /// Equivalent to `self.buffer.bit_slice(self.offset, self.len)`
     pub fn sliced(&self) -> Buffer {
@@ -489,6 +555,57 @@ impl BooleanBuffer {
         self.buffer
     }
 
+    /// Claim memory used by this buffer in the provided memory pool.
+    ///
+    /// See [`Buffer::claim`] for details.
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+        self.buffer.claim(pool);
+    }
+
+    /// Apply a bitwise binary operation to `self`.
+    ///
+    /// If the underlying buffer is uniquely owned, reuses the allocation
+    /// and updates the bytes in place. If the underlying buffer is shared,
+    /// returns a newly allocated buffer.
+    ///
+    /// # API Notes
+    ///
+    /// If the buffer is reused, the result preserves the existing offset, which
+    /// may be non-zero.
+    fn bitwise_bin_op_assign<F>(&mut self, rhs: &BooleanBuffer, op: F)
+    where
+        F: FnMut(u64, u64) -> u64,
+    {
+        assert_eq!(self.bit_len, rhs.bit_len);
+        // Try to mutate in place if the buffer is uniquely owned
+        let buffer = std::mem::take(&mut self.buffer);
+        match buffer.into_mutable() {
+            Ok(mut buf) => {
+                bit_util::apply_bitwise_binary_op(
+                    &mut buf,
+                    self.bit_offset,
+                    &rhs.buffer,
+                    rhs.bit_offset,
+                    self.bit_len,
+                    op,
+                );
+                self.buffer = buf.into();
+            }
+            Err(buf) => {
+                self.buffer = buf;
+                *self = BooleanBuffer::from_bitwise_binary_op(
+                    self.values(),
+                    self.bit_offset,
+                    rhs.values(),
+                    rhs.bit_offset,
+                    self.bit_len,
+                    op,
+                );
+            }
+        }
+    }
+
     /// Returns an iterator over the bits in this [`BooleanBuffer`]
     pub fn iter(&self) -> BitIterator<'_> {
         self.into_iter()
@@ -575,6 +692,24 @@ impl BitXor<&BooleanBuffer> for &BooleanBuffer {
     }
 }
 
+impl BitAndAssign<&BooleanBuffer> for BooleanBuffer {
+    fn bitand_assign(&mut self, rhs: &BooleanBuffer) {
+        self.bitwise_bin_op_assign(rhs, |a, b| a & b);
+    }
+}
+
+impl BitOrAssign<&BooleanBuffer> for BooleanBuffer {
+    fn bitor_assign(&mut self, rhs: &BooleanBuffer) {
+        self.bitwise_bin_op_assign(rhs, |a, b| a | b);
+    }
+}
+
+impl BitXorAssign<&BooleanBuffer> for BooleanBuffer {
+    fn bitxor_assign(&mut self, rhs: &BooleanBuffer) {
+        self.bitwise_bin_op_assign(rhs, |a, b| a ^ b);
+    }
+}
+
 impl<'a> IntoIterator for &'a BooleanBuffer {
     type Item = bool;
     type IntoIter = BitIterator<'a>;
@@ -721,6 +856,47 @@ mod tests {
         assert_eq!(boolean_buf1 ^ boolean_buf2, expected);
     }
 
+    #[test]
+    fn test_boolean_bitand_assign_shared_and_unshared() {
+        let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]);
+        let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]);
+
+        let mut unshared = BooleanBuffer::from(&[true, false, true, true, true, false][..]);
+        unshared &= &rhs;
+
+        let mut shared = original.clone();
+        let _shared_owner = shared.clone();
+        shared &= &rhs;
+
+        let expected = &original & &rhs;
+        assert_eq!(unshared, expected);
+        assert_eq!(shared, expected);
+    }
+
+    #[test]
+    fn test_boolean_bitor_assign() {
+        let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]);
+        let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]);
+
+        let mut actual = original.clone();
+        actual |= &rhs;
+
+        let expected = &original | &rhs;
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn test_boolean_bitxor_assign() {
+        let rhs = BooleanBuffer::from(&[true, true, false, true, false, true][..]);
+        let original = BooleanBuffer::from(&[true, false, true, true, true, false][..]);
+
+        let mut actual = original.clone();
+        actual ^= &rhs;
+
+        let expected = &original ^ &rhs;
+        assert_eq!(actual, expected);
+    }
+
     #[test]
     fn test_boolean_not() {
         let offset = 0;
@@ -863,6 +1039,127 @@ mod tests {
         }
     }
 
+    #[test]
+    fn test_from_bitwise_binary_op_same_mod_64_unaligned_fallback() {
+        // Exercise the shared-alignment fast path when both inputs are misaligned in memory,
+        // forcing the chunks_exact fallback instead of align_to::<u64>().
+        let left_bytes = [
+            0,           // dropped so `&left_bytes[1..]` is not u64-aligned in memory
+            0b1101_0010, // logical left bits start at bit 3 of this byte
+            0b0110_1101,
+            0b1010_0111,
+            0b0001_1110,
+            0b1110_0001,
+            0b0101_1010,
+            0b1001_0110,
+            0b0011_1100,
+            0b1011_0001,
+            0b0100_1110,
+            0b1100_0011,
+            0b0111_1000,
+        ];
+        let right_bytes = [
+            0,           // dropped so `&right_bytes[1..]` is not u64-aligned in memory
+            0b1010_1100, // logical right bits start at bit 67 == bit 3 of the second 64-bit block
+            0b0101_0011,
+            0b1111_0000,
+            0b0011_1010,
+            0b1000_1111,
+            0b0110_0101,
+            0b1101_1000,
+            0b0001_0111,
+            0b1110_0100,
+            0b0010_1101,
+            0b1001_1010,
+            0b0111_0001,
+        ];
+
+        let left = &left_bytes[1..];
+        let right = &right_bytes[1..];
+
+        let left_offset = 3;
+        let right_offset = 67; // same mod 64 as left_offset, so this takes the shared-alignment path
+        let len = 24; // leaves a partial trailing chunk, so this covers the non-empty remainder branch
+
+        let result = BooleanBuffer::from_bitwise_binary_op(
+            left,
+            left_offset,
+            right,
+            right_offset,
+            len,
+            |a, b| a & b,
+        );
+        let expected = (0..len)
+            .map(|i| {
+                bit_util::get_bit(left, left_offset + i)
+                    & bit_util::get_bit(right, right_offset + i)
+            })
+            .collect::<BooleanBuffer>();
+
+        assert_eq!(result, expected);
+        assert_eq!(result.offset(), left_offset % 64);
+    }
+
+    #[test]
+    fn test_from_bitwise_binary_op_same_mod_64_unaligned_fallback_no_remainder() {
+        // Force the chunks_exact fallback with an exact 8-byte chunk so both remainders are empty.
+        let left_bytes = [
+            0,           // dropped so `&left_bytes[1..]` is not u64-aligned in memory
+            0b1010_1100, // logical left bits start at bit 3 of this byte
+            0b0110_1001,
+            0b1101_0011,
+            0b0001_1110,
+            0b1110_0101,
+            0b0101_1000,
+            0b1001_0111,
+            0b0011_1101,
+        ];
+        let right_bytes = [
+            0,           // dropped so `&right_bytes[1..]` is not u64-aligned in memory
+            0b0111_0010, // logical right bits start at bit 67 == bit 3 of the second 64-bit block
+            0b1010_1001,
+            0b0101_1110,
+            0b1100_0011,
+            0b0011_1011,
+            0b1000_1110,
+            0b1111_0001,
+            0b0100_1101,
+            0b1011_0110,
+            0b0001_1011,
+            0b1101_0100,
+            0b0110_0011,
+            0b1001_1110,
+            0b0010_1001,
+            0b1110_0110,
+            0b0101_0001,
+        ];
+
+        let left = &left_bytes[1..];
+        let right = &right_bytes[1..];
+
+        let left_offset = 3;
+        let right_offset = 67; // same mod 64 as left_offset, so this takes the shared-alignment path
+        let len = 61; // 3 + 61 = 64, so the aligned slices are exactly one 8-byte chunk with empty remainders
+
+        let result = BooleanBuffer::from_bitwise_binary_op(
+            left,
+            left_offset,
+            right,
+            right_offset,
+            len,
+            |a, b| a | b,
+        );
+        let expected = (0..len)
+            .map(|i| {
+                bit_util::get_bit(left, left_offset + i)
+                    | bit_util::get_bit(right, right_offset + i)
+            })
+            .collect::<BooleanBuffer>();
+
+        assert_eq!(result, expected);
+        assert_eq!(result.offset(), left_offset % 64);
+    }
+
     #[test]
     fn test_extend_trusted_len_sets_byte_len() {
         // Ensures extend_trusted_len keeps the underlying byte length in sync with bit length.
diff --git a/arrow-buffer/src/buffer/mutable.rs b/arrow-buffer/src/buffer/mutable.rs
index 9fc860506194..07ef965cb082 100644
--- a/arrow-buffer/src/buffer/mutable.rs
+++ b/arrow-buffer/src/buffer/mutable.rs
@@ -450,7 +450,13 @@ impl MutableBuffer {
 
     /// Clear all existing data from this buffer.
     pub fn clear(&mut self) {
-        self.len = 0
+        self.len = 0;
+        #[cfg(feature = "pool")]
+        {
+            if let Some(reservation) = self.reservation.lock().unwrap().as_mut() {
+                reservation.resize(self.len);
+            }
+        }
     }
 
     /// Returns the data stored in this buffer as a slice.
@@ -1371,7 +1377,7 @@ mod tests {
             assert_eq!(pool.used(), 40);
 
             // Truncate to zero
-            buffer.truncate(0);
+            buffer.clear();
             assert_eq!(buffer.len(), 0);
             assert_eq!(pool.used(), 0);
         }
diff --git a/arrow-buffer/src/buffer/null.rs b/arrow-buffer/src/buffer/null.rs
index 64a21d99e830..6046369c62a7 100644
--- a/arrow-buffer/src/buffer/null.rs
+++ b/arrow-buffer/src/buffer/null.rs
@@ -26,7 +26,7 @@ use crate::{Buffer, MutableBuffer};
 /// that it is null.
 ///
 /// # See also
-/// * [`NullBufferBuilder`] for creating `NullBuffer`s  
+/// * [`NullBufferBuilder`] for creating `NullBuffer`s
 ///
 /// [Arrow specification]: https://arrow.apache.org/docs/format/Columnar.html#validity-bitmaps
 /// [`NullBufferBuilder`]: crate::NullBufferBuilder
@@ -222,6 +222,22 @@ impl NullBuffer {
     pub fn buffer(&self) -> &Buffer {
         self.buffer.inner()
     }
+
+    /// Create a [`NullBuffer`] from an *unsliced* validity bitmap (`offset = 0` **bits**) of length `len`.
+    ///
+    /// Returns `None` if there are no nulls (all values valid).
+    pub fn from_unsliced_buffer(buffer: impl Into<Buffer>, len: usize) -> Option<Self> {
+        let bb = BooleanBuffer::new(buffer.into(), 0, len);
+        let nb = NullBuffer::new(bb);
+        (nb.null_count() > 0).then_some(nb)
+    }
+
+    /// Claim memory used by this null buffer in the provided memory pool.
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+        // NullBuffer wraps a BooleanBuffer which wraps a Buffer
+        self.buffer.inner().claim(pool);
+    }
 }
 
 impl<'a> IntoIterator for &'a NullBuffer {
@@ -266,6 +282,7 @@ impl FromIterator<bool> for NullBuffer {
 #[cfg(test)]
 mod tests {
     use super::*;
+
     #[test]
     fn test_size() {
         // This tests that the niche optimisation eliminates the overhead of an option
@@ -274,4 +291,49 @@ mod tests {
             std::mem::size_of::<Option<NullBuffer>>()
         );
     }
+
+    #[test]
+    fn test_from_unsliced_buffer_with_nulls() {
+        // 0b10110010 → null(0), valid(1), null(2), null(3), valid(4), valid(5), null(6), valid(7)
+        let buf = Buffer::from([0b10110010u8]);
+        let result = NullBuffer::from_unsliced_buffer(buf, 8);
+        assert!(result.is_some());
+        let nb = result.unwrap();
+        assert_eq!(nb.len(), 8);
+        assert_eq!(nb.null_count(), 4);
+        assert!(nb.is_null(0));
+        assert!(nb.is_valid(1));
+        assert!(nb.is_null(2));
+        assert!(nb.is_null(3));
+        assert!(nb.is_valid(4));
+        assert!(nb.is_valid(5));
+        assert!(nb.is_null(6));
+        assert!(nb.is_valid(7));
+    }
+
+    #[test]
+    fn test_from_unsliced_buffer_all_valid() {
+        // All bits set = all valid, no nulls
+        let buf = Buffer::from([0b11111111u8]);
+        let result = NullBuffer::from_unsliced_buffer(buf, 8);
+        assert!(result.is_none());
+    }
+
+    #[test]
+    fn test_from_unsliced_buffer_all_null() {
+        // No bits set = all null
+        let buf = Buffer::from([0b00000000u8]);
+        let result = NullBuffer::from_unsliced_buffer(buf, 8);
+        assert!(result.is_some());
+        let nb = result.unwrap();
+        assert_eq!(nb.len(), 8);
+        assert_eq!(nb.null_count(), 8);
+    }
+
+    #[test]
+    fn test_from_unsliced_buffer_empty() {
+        let buf = Buffer::from([]);
+        let result = NullBuffer::from_unsliced_buffer(buf, 0);
+        assert!(result.is_none());
+    }
 }
diff --git a/arrow-buffer/src/buffer/offset.rs b/arrow-buffer/src/buffer/offset.rs
index 66fa7dd22ec5..bb34c8b23892 100644
--- a/arrow-buffer/src/buffer/offset.rs
+++ b/arrow-buffer/src/buffer/offset.rs
@@ -220,6 +220,12 @@ impl<O: ArrowNativeType> OffsetBuffer<O> {
         self.0
     }
 
+    /// Claim memory used by this buffer in the provided memory pool.
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+        self.0.claim(pool);
+    }
+
     /// Returns a zero-copy slice of this buffer with length `len` and starting at `offset`
     pub fn slice(&self, offset: usize, len: usize) -> Self {
         Self(self.0.slice(offset, len.saturating_add(1)))
diff --git a/arrow-buffer/src/buffer/ops.rs b/arrow-buffer/src/buffer/ops.rs
index 36efe876432d..793bbaf6c2e7 100644
--- a/arrow-buffer/src/buffer/ops.rs
+++ b/arrow-buffer/src/buffer/ops.rs
@@ -143,6 +143,9 @@ where
 
 /// Apply a bitwise and to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly
 pub fn buffer_bin_and(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -150,19 +153,27 @@ pub fn buffer_bin_and(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    BooleanBuffer::from_bitwise_binary_op(
+    let result = BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
         right_offset_in_bits,
         len_in_bits,
         |a, b| a & b,
-    )
-    .into_inner()
+    );
+    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+    if result.offset() == 0 {
+        result.into_inner()
+    } else {
+        result.sliced()
+    }
 }
 
 /// Apply a bitwise or to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly
 pub fn buffer_bin_or(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -170,19 +181,27 @@ pub fn buffer_bin_or(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    BooleanBuffer::from_bitwise_binary_op(
+    let result = BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
         right_offset_in_bits,
         len_in_bits,
         |a, b| a | b,
-    )
-    .into_inner()
+    );
+    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+    if result.offset() == 0 {
+        result.into_inner()
+    } else {
+        result.sliced()
+    }
 }
 
 /// Apply a bitwise xor to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly
 pub fn buffer_bin_xor(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -190,19 +209,27 @@ pub fn buffer_bin_xor(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    BooleanBuffer::from_bitwise_binary_op(
+    let result = BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
         right_offset_in_bits,
         len_in_bits,
         |a, b| a ^ b,
-    )
-    .into_inner()
+    );
+    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+    if result.offset() == 0 {
+        result.into_inner()
+    } else {
+        result.sliced()
+    }
 }
 
 /// Apply a bitwise and_not to two inputs and return the result as a Buffer.
 /// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_binary_op`] for creating `BooleanBuffer`s directly
 pub fn buffer_bin_and_not(
     left: &Buffer,
     left_offset_in_bits: usize,
@@ -210,19 +237,70 @@ pub fn buffer_bin_and_not(
     right_offset_in_bits: usize,
     len_in_bits: usize,
 ) -> Buffer {
-    BooleanBuffer::from_bitwise_binary_op(
+    let result = BooleanBuffer::from_bitwise_binary_op(
         left,
         left_offset_in_bits,
         right,
         right_offset_in_bits,
         len_in_bits,
         |a, b| a & !b,
-    )
-    .into_inner()
+    );
+    // Normalize non-zero BooleanBuffer offsets back to a zero-offset Buffer.
+    if result.offset() == 0 {
+        result.into_inner()
+    } else {
+        result.sliced()
+    }
 }
 
 /// Apply a bitwise not to one input and return the result as a Buffer.
 /// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
+///
+/// # See Also
+/// * [`BooleanBuffer::from_bitwise_unary_op`] for creating `BooleanBuffer`s directly
 pub fn buffer_unary_not(left: &Buffer, offset_in_bits: usize, len_in_bits: usize) -> Buffer {
     BooleanBuffer::from_bitwise_unary_op(left, offset_in_bits, len_in_bits, |a| !a).into_inner()
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_buffer_bin_ops_return_zero_offset_buffers() {
+        let left = Buffer::from(vec![0b1010_1100, 0b0110_1001]);
+        let right = Buffer::from(vec![0, 0, 0, 0, 0, 0, 0, 0, 0b1110_0101, 0b0101_1000]);
+
+        let left_offset = 1;
+        let right_offset = 65; // same mod 64 as left_offset, so from_bitwise_binary_op returns non-zero offset
+        let len = 7;
+
+        // Reuse the same offset scenario for all four binary wrappers:
+        // each wrapper should return the logically equivalent offset-0 Buffer,
+        // even though the underlying BooleanBuffer result has offset 1.
+        for (op, wrapper) in [
+            (
+                (|a, b| a & b) as fn(u64, u64) -> u64,
+                buffer_bin_and as fn(&Buffer, usize, &Buffer, usize, usize) -> Buffer,
+            ),
+            (((|a, b| a | b) as fn(u64, u64) -> u64), buffer_bin_or),
+            (((|a, b| a ^ b) as fn(u64, u64) -> u64), buffer_bin_xor),
+            (((|a, b| a & !b) as fn(u64, u64) -> u64), buffer_bin_and_not),
+        ] {
+            let unsliced = BooleanBuffer::from_bitwise_binary_op(
+                &left,
+                left_offset,
+                &right,
+                right_offset,
+                len,
+                op,
+            );
+            assert_eq!(unsliced.offset(), 1);
+
+            let result = wrapper(&left, left_offset, &right, right_offset, len);
+
+            assert_eq!(result, unsliced.sliced());
+            assert_eq!(result.len(), 1);
+        }
+    }
+}
diff --git a/arrow-buffer/src/buffer/run.rs b/arrow-buffer/src/buffer/run.rs
index 0f4d9234e4cf..703ae913801d 100644
--- a/arrow-buffer/src/buffer/run.rs
+++ b/arrow-buffer/src/buffer/run.rs
@@ -294,6 +294,12 @@ where
         self.run_ends
     }
 
+    /// Claim memory used by this buffer in the provided memory pool.
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+        self.run_ends.claim(pool);
+    }
+
     /// Returns the physical indices corresponding to the provided logical indices.
     ///
     /// Given a slice of logical indices, this method returns a `Vec` containing the
diff --git a/arrow-buffer/src/buffer/scalar.rs b/arrow-buffer/src/buffer/scalar.rs
index 3c5334ca5118..f74b93ab8914 100644
--- a/arrow-buffer/src/buffer/scalar.rs
+++ b/arrow-buffer/src/buffer/scalar.rs
@@ -126,6 +126,14 @@ impl<T: ArrowNativeType> ScalarBuffer<T> {
         self.buffer
     }
 
+    /// Claim memory used by this buffer in the provided memory pool.
+    ///
+    /// See [`Buffer::claim`] for details.
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn crate::MemoryPool) {
+        self.buffer.claim(pool);
+    }
+
     /// Returns true if this [`ScalarBuffer`] is equal to `other`, using pointer comparisons
     /// to determine buffer equality. This is cheaper than `PartialEq::eq` but may
     /// return false when the arrays are logically equal
diff --git a/arrow-buffer/src/util/bit_mask.rs b/arrow-buffer/src/util/bit_mask.rs
index a8ae1a765414..e3897e67542d 100644
--- a/arrow-buffer/src/util/bit_mask.rs
+++ b/arrow-buffer/src/util/bit_mask.rs
@@ -32,8 +32,18 @@ pub fn set_bits(
     offset_read: usize,
     len: usize,
 ) -> usize {
-    assert!(offset_write + len <= write_data.len() * 8);
-    assert!(offset_read + len <= data.len() * 8);
+    assert!(
+        offset_write
+            .checked_add(len)
+            .expect("operation will overflow write buffer")
+            <= write_data.len() * 8
+    );
+    assert!(
+        offset_read
+            .checked_add(len)
+            .expect("operation will overflow read buffer")
+            <= data.len() * 8
+    );
     let mut null_count = 0;
     let mut acc = 0;
     while len > acc {
@@ -427,4 +437,40 @@ mod tests {
         assert_eq!(len_set, 1);
         assert_eq!(write_data, &[0b00000010]);
     }
+
+    #[test]
+    #[should_panic(expected = "operation will overflow read buffer")]
+    fn test_overflow_read_buffer_bounds() {
+        // Tiny buffers so any huge computed index is out-of-bounds.
+        let data = [0u8; 1];
+        let mut write_data = [0u8; 1];
+
+        // Choose values so (offset_read + len) wraps to a small number in release builds.
+        // offset_read = usize::MAX - 7, len = 8 => wraps to 0.
+        // This can bypass `assert!(offset_read + len <= data.len() * 8)`.
+        let offset_write: usize = 0;
+        let offset_read: usize = usize::MAX - 7;
+        let len: usize = 8;
+
+        // should panic on bounds check overflow
+        let _nulls = set_bits(&mut write_data, &data, offset_write, offset_read, len);
+    }
+
+    #[test]
+    #[should_panic(expected = "operation will overflow write buffer")]
+    fn test_overflow_write_buffer_bounds() {
+        // Tiny buffers so any huge computed index is out-of-bounds.
+        let data = [0u8; 1];
+        let mut write_data = [0u8; 1];
+
+        // Choose values so (offset_write + len) wraps to a small number in release builds.
+        // offset_write = usize::MAX - 7, len = 8 => wraps to 0.
+        // This can bypass `assert!(offset_write + len <= write_data.len() * 8)`.
+        let offset_write: usize = usize::MAX - 7;
+        let offset_read: usize = 0;
+        let len: usize = 8;
+
+        // should panic on bounds check overflow
+        let _nulls = set_bits(&mut write_data, &data, offset_write, offset_read, len);
+    }
 }
diff --git a/arrow-cast/Cargo.toml b/arrow-cast/Cargo.toml
index 536bc101a816..81649353d182 100644
--- a/arrow-cast/Cargo.toml
+++ b/arrow-cast/Cargo.toml
@@ -58,6 +58,7 @@ ryu = "1.0.16"
 [dev-dependencies]
 criterion = { workspace = true, default-features = false }
 half = { version = "2.1", default-features = false }
+insta = { workspace = true }
 rand = "0.9"
 
 [[bench]]
diff --git a/arrow-cast/src/base64.rs b/arrow-cast/src/base64.rs
index 5637bdc689d9..6a8da0141dea 100644
--- a/arrow-cast/src/base64.rs
+++ b/arrow-cast/src/base64.rs
@@ -106,7 +106,7 @@ mod tests {
         let data: BinaryArray = (0..len)
             .map(|_| {
                 let len = rng.random_range(0..16);
-                Some((0..len).map(|_| rng.random()).collect::<Vec<u8>>())
+                Some((0..len).map(|_| rng.random::<u8>()).collect::<Vec<u8>>())
             })
             .collect();
 
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 67efb5742485..9f1eba1057fd 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -2507,7 +2507,7 @@ fn adjust_timestamp_to_timezone<T: ArrowTimestampType>(
     let adjust = |o| {
         let local = as_datetime::<T>(o)?;
         let offset = to_tz.offset_from_local_datetime(&local).single()?;
-        T::make_value(local - offset.fix())
+        T::from_naive_datetime(local - offset.fix(), None)
     };
     let adjusted = if cast_options.safe {
         array.unary_opt::<_, Int64Type>(adjust)
diff --git a/arrow-cast/src/cast/string.rs b/arrow-cast/src/cast/string.rs
index 77696ae0d8cc..68fce85cb436 100644
--- a/arrow-cast/src/cast/string.rs
+++ b/arrow-cast/src/cast/string.rs
@@ -168,7 +168,7 @@ fn cast_string_to_timestamp_impl<
         let iter = iter.map(|v| {
             v.and_then(|v| {
                 let naive = string_to_datetime(tz, v).ok()?.naive_utc();
-                T::make_value(naive)
+                T::from_naive_datetime(naive, None)
             })
         });
         // Benefit:
@@ -182,7 +182,7 @@ fn cast_string_to_timestamp_impl<
             .map(|v| {
                 v.map(|v| {
                     let naive = string_to_datetime(tz, v)?.naive_utc();
-                    T::make_value(naive).ok_or_else(|| match T::UNIT {
+                    T::from_naive_datetime(naive, None).ok_or_else(|| match T::UNIT {
                         TimeUnit::Nanosecond => ArrowError::CastError(format!(
                             "Overflow converting {naive} to Nanosecond. The dates that can be represented as nanoseconds have to be between 1677-09-21T00:12:44.0 and 2262-04-11T23:47:16.854775804"
                         )),
diff --git a/arrow-cast/src/display.rs b/arrow-cast/src/display.rs
index 59dfa26c93cb..0460c0c96b55 100644
--- a/arrow-cast/src/display.rs
+++ b/arrow-cast/src/display.rs
@@ -80,6 +80,8 @@ pub struct FormatOptions<'a> {
     duration_format: DurationFormat,
     /// Show types in visual representation batches
     types_info: bool,
+    /// Whether string values should be quoted
+    quoted_strings: bool,
     /// Formatter factory used to instantiate custom [`ArrayFormatter`]s. This allows users to
     /// provide custom formatters.
     formatter_factory: Option<&'a dyn ArrayFormatterFactory>,
@@ -102,6 +104,7 @@ impl PartialEq for FormatOptions<'_> {
             && self.time_format == other.time_format
             && self.duration_format == other.duration_format
             && self.types_info == other.types_info
+            && self.quoted_strings == other.quoted_strings
             && match (self.formatter_factory, other.formatter_factory) {
                 (Some(f1), Some(f2)) => std::ptr::eq(f1, f2),
                 (None, None) => true,
@@ -123,6 +126,7 @@ impl Hash for FormatOptions<'_> {
         self.time_format.hash(state);
         self.duration_format.hash(state);
         self.types_info.hash(state);
+        self.quoted_strings.hash(state);
         self.formatter_factory
             .map(|f| f as *const dyn ArrayFormatterFactory)
             .hash(state);
@@ -142,6 +146,7 @@ impl<'a> FormatOptions<'a> {
             time_format: None,
             duration_format: DurationFormat::ISO8601,
             types_info: false,
+            quoted_strings: false,
             formatter_factory: None,
         }
     }
@@ -217,6 +222,17 @@ impl<'a> FormatOptions<'a> {
         Self { types_info, ..self }
     }
 
+    /// Sets whether string values should be quoted
+    ///
+    /// When `true`, strings are formatted using [`Debug`]-style with double quotes and escaping.
+    /// Defaults to `false`
+    pub const fn with_quoted_strings(self, quoted_strings: bool) -> Self {
+        Self {
+            quoted_strings,
+            ..self
+        }
+    }
+
     /// Overrides the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s.
     ///
     /// Using [`None`] causes pretty-printers to use the default [`ArrayFormatter`]s.
@@ -276,6 +292,11 @@ impl<'a> FormatOptions<'a> {
         self.types_info
     }
 
+    /// Returns whether string values should be quoted.
+    pub const fn quoted_strings(&self) -> bool {
+        self.quoted_strings
+    }
+
     /// Returns the [`ArrayFormatterFactory`] used to instantiate custom [`ArrayFormatter`]s.
     pub const fn formatter_factory(&self) -> Option<&'a dyn ArrayFormatterFactory> {
         self.formatter_factory
@@ -1081,16 +1102,38 @@ impl Display for MillisecondsFormatter<'_> {
     }
 }
 
-impl<O: OffsetSizeTrait> DisplayIndex for &GenericStringArray<O> {
-    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
-        write!(f, "{}", self.value(idx))?;
+impl<'a, O: OffsetSizeTrait> DisplayIndexState<'a> for &'a GenericStringArray<O> {
+    type State = bool;
+
+    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
+        Ok(options.quoted_strings())
+    }
+
+    fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
+        let value = self.value(idx);
+        if *state {
+            write!(f, "{:?}", value)?;
+        } else {
+            write!(f, "{}", value)?;
+        }
         Ok(())
     }
 }
 
-impl DisplayIndex for &StringViewArray {
-    fn write(&self, idx: usize, f: &mut dyn Write) -> FormatResult {
-        write!(f, "{}", self.value(idx))?;
+impl<'a> DisplayIndexState<'a> for &'a StringViewArray {
+    type State = bool;
+
+    fn prepare(&self, options: &FormatOptions<'a>) -> Result<Self::State, ArrowError> {
+        Ok(options.quoted_strings())
+    }
+
+    fn write(&self, state: &Self::State, idx: usize, f: &mut dyn Write) -> FormatResult {
+        let value = self.value(idx);
+        if *state {
+            write!(f, "{:?}", value)?;
+        } else {
+            write!(f, "{}", value)?;
+        }
         Ok(())
     }
 }
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index e7c199dbed97..61ce5598992d 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -318,20 +318,16 @@ mod tests {
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
-        let expected = vec![
-            "+---+-----+",
-            "| a | b   |",
-            "+---+-----+",
-            "| a | 1   |",
-            "| b |     |",
-            "|   | 10  |",
-            "| d | 100 |",
-            "+---+-----+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +---+-----+
+        | a | b   |
+        +---+-----+
+        | a | 1   |
+        | b |     |
+        |   | 10  |
+        | d | 100 |
+        +---+-----+
+        ");
     }
 
     #[test]
@@ -348,14 +344,19 @@ mod tests {
 
         let table = pretty_format_columns("a", &columns).unwrap().to_string();
 
-        let expected = vec![
-            "+---+", "| a |", "+---+", "| a |", "| b |", "|   |", "| d |", "| e |", "|   |",
-            "| g |", "+---+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +---+
+        | a |
+        +---+
+        | a |
+        | b |
+        |   |
+        | d |
+        | e |
+        |   |
+        | g |
+        +---+
+        ");
     }
 
     #[test]
@@ -378,20 +379,16 @@ mod tests {
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
-        let expected = vec![
-            "+---+---+---+",
-            "| a | b | c |",
-            "+---+---+---+",
-            "|   |   |   |",
-            "|   |   |   |",
-            "|   |   |   |",
-            "|   |   |   |",
-            "+---+---+---+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{table:#?}");
+        insta::assert_snapshot!(table, @"
+        +---+---+---+
+        | a | b | c |
+        +---+---+---+
+        |   |   |   |
+        |   |   |   |
+        |   |   |   |
+        |   |   |   |
+        +---+---+---+
+        ");
     }
 
     #[test]
@@ -411,19 +408,15 @@ mod tests {
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
-        let expected = vec![
-            "+-------+",
-            "| d1    |",
-            "+-------+",
-            "| one   |",
-            "|       |",
-            "| three |",
-            "+-------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +-------+
+        | d1    |
+        +-------+
+        | one   |
+        |       |
+        | three |
+        +-------+
+        ");
     }
 
     #[test]
@@ -447,19 +440,16 @@ mod tests {
 
         let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let expected = vec![
-            "+-----------+",
-            "| d1        |",
-            "+-----------+",
-            "| [1, 2, 3] |",
-            "|           |",
-            "| [7, 8, 9] |",
-            "+-----------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
 
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +-----------+
+        | d1        |
+        +-----------+
+        | [1, 2, 3] |
+        |           |
+        | [7, 8, 9] |
+        +-----------+
+        ");
     }
 
     #[test]
@@ -482,22 +472,19 @@ mod tests {
         let array: ArrayRef = Arc::new(builder.finish());
         let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let expected = vec![
-            "+-----------------------+",
-            "| d1                    |",
-            "+-----------------------+",
-            "| hello                 |",
-            "|                       |",
-            "| longer than 12 bytes  |",
-            "| another than 12 bytes |",
-            "|                       |",
-            "| small                 |",
-            "+-----------------------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
 
-        assert_eq!(expected, actual, "Actual result:\n{table:#?}");
+        insta::assert_snapshot!(table, @"
+        +-----------------------+
+        | d1                    |
+        +-----------------------+
+        | hello                 |
+        |                       |
+        | longer than 12 bytes  |
+        | another than 12 bytes |
+        |                       |
+        | small                 |
+        +-----------------------+
+        ");
     }
 
     #[test]
@@ -520,22 +507,19 @@ mod tests {
         let array: ArrayRef = Arc::new(builder.finish());
         let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let expected = vec![
-            "+--------------------------------------------+",
-            "| d1                                         |",
-            "+--------------------------------------------+",
-            "| 68656c6c6f                                 |",
-            "|                                            |",
-            "| 6c6f6e676572207468616e203132206279746573   |",
-            "| 616e6f74686572207468616e203132206279746573 |",
-            "|                                            |",
-            "| 736d616c6c                                 |",
-            "+--------------------------------------------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
 
-        assert_eq!(expected, actual, "Actual result:\n\n{table:#?}");
+        insta::assert_snapshot!(table, @"
+        +--------------------------------------------+
+        | d1                                         |
+        +--------------------------------------------+
+        | 68656c6c6f                                 |
+        |                                            |
+        | 6c6f6e676572207468616e203132206279746573   |
+        | 616e6f74686572207468616e203132206279746573 |
+        |                                            |
+        | 736d616c6c                                 |
+        +--------------------------------------------+
+        ");
     }
 
     #[test]
@@ -554,47 +538,34 @@ mod tests {
 
         let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let expected = vec![
-            "+--------+",
-            "| d1     |",
-            "+--------+",
-            "| 010203 |",
-            "|        |",
-            "| 070809 |",
-            "+--------+",
-        ];
 
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +--------+
+        | d1     |
+        +--------+
+        | 010203 |
+        |        |
+        | 070809 |
+        +--------+
+        ");
     }
 
-    /// Generate an array with type $ARRAYTYPE with a numeric value of
-    /// $VALUE, and compare $EXPECTED_RESULT to the output of
-    /// formatting that array with `pretty_format_batches`
-    macro_rules! check_datetime {
-        ($ARRAYTYPE:ident, $VALUE:expr, $EXPECTED_RESULT:expr) => {
-            let mut builder = $ARRAYTYPE::builder(10);
-            builder.append_value($VALUE);
-            builder.append_null();
-            let array = builder.finish();
-
-            let schema = Arc::new(Schema::new(vec![Field::new(
-                "f",
-                array.data_type().clone(),
-                true,
-            )]));
-            let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();
-
-            let table = pretty_format_batches(&[batch])
-                .expect("formatting batches")
-                .to_string();
-
-            let expected = $EXPECTED_RESULT;
-            let actual: Vec<&str> = table.lines().collect();
-
-            assert_eq!(expected, actual, "Actual result:\n\n{actual:#?}\n\n");
-        };
+    /// Generate an array of [`ArrowPrimitiveType`] with a numeric `value`,
+    /// then format it with `pretty_format_batches`.
+    fn format_primitive_batch<T: ArrowPrimitiveType>(value: T::Native) -> String {
+        let mut builder = PrimitiveBuilder::<T>::with_capacity(10);
+        builder.append_value(value);
+        builder.append_null();
+        let array = builder.finish();
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "f",
+            array.data_type().clone(),
+            true,
+        )]));
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();
+        pretty_format_batches(&[batch])
+            .expect("formatting batches")
+            .to_string()
     }
 
     fn timestamp_batch<T: ArrowTimestampType>(timezone: &str, value: T::Native) -> RecordBatch {
@@ -617,158 +588,151 @@ mod tests {
         let batch = timestamp_batch::<TimestampSecondType>("+08:00", 11111111);
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
-        let expected = vec![
-            "+---------------------------+",
-            "| f                         |",
-            "+---------------------------+",
-            "| 1970-05-09T22:25:11+08:00 |",
-            "|                           |",
-            "+---------------------------+",
-        ];
-        let actual: Vec<&str> = table.lines().collect();
-        assert_eq!(expected, actual, "Actual result:\n\n{actual:#?}\n\n");
+        insta::assert_snapshot!(table, @"
+        +---------------------------+
+        | f                         |
+        +---------------------------+
+        | 1970-05-09T22:25:11+08:00 |
+        |                           |
+        +---------------------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_timestamp_second() {
-        let expected = vec![
-            "+---------------------+",
-            "| f                   |",
-            "+---------------------+",
-            "| 1970-05-09T14:25:11 |",
-            "|                     |",
-            "+---------------------+",
-        ];
-        check_datetime!(TimestampSecondArray, 11111111, expected);
+        let table = format_primitive_batch::<TimestampSecondType>(11111111);
+        insta::assert_snapshot!(table, @"
+        +---------------------+
+        | f                   |
+        +---------------------+
+        | 1970-05-09T14:25:11 |
+        |                     |
+        +---------------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_timestamp_millisecond() {
-        let expected = vec![
-            "+-------------------------+",
-            "| f                       |",
-            "+-------------------------+",
-            "| 1970-01-01T03:05:11.111 |",
-            "|                         |",
-            "+-------------------------+",
-        ];
-        check_datetime!(TimestampMillisecondArray, 11111111, expected);
+        let table = format_primitive_batch::<TimestampMillisecondType>(11111111);
+        insta::assert_snapshot!(table, @"
+        +-------------------------+
+        | f                       |
+        +-------------------------+
+        | 1970-01-01T03:05:11.111 |
+        |                         |
+        +-------------------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_timestamp_microsecond() {
-        let expected = vec![
-            "+----------------------------+",
-            "| f                          |",
-            "+----------------------------+",
-            "| 1970-01-01T00:00:11.111111 |",
-            "|                            |",
-            "+----------------------------+",
-        ];
-        check_datetime!(TimestampMicrosecondArray, 11111111, expected);
+        let table = format_primitive_batch::<TimestampMicrosecondType>(11111111);
+        insta::assert_snapshot!(table, @"
+        +----------------------------+
+        | f                          |
+        +----------------------------+
+        | 1970-01-01T00:00:11.111111 |
+        |                            |
+        +----------------------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_timestamp_nanosecond() {
-        let expected = vec![
-            "+-------------------------------+",
-            "| f                             |",
-            "+-------------------------------+",
-            "| 1970-01-01T00:00:00.011111111 |",
-            "|                               |",
-            "+-------------------------------+",
-        ];
-        check_datetime!(TimestampNanosecondArray, 11111111, expected);
+        let table = format_primitive_batch::<TimestampNanosecondType>(11111111);
+        insta::assert_snapshot!(table, @"
+        +-------------------------------+
+        | f                             |
+        +-------------------------------+
+        | 1970-01-01T00:00:00.011111111 |
+        |                               |
+        +-------------------------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_date_32() {
-        let expected = vec![
-            "+------------+",
-            "| f          |",
-            "+------------+",
-            "| 1973-05-19 |",
-            "|            |",
-            "+------------+",
-        ];
-        check_datetime!(Date32Array, 1234, expected);
+        let table = format_primitive_batch::<Date32Type>(1234);
+        insta::assert_snapshot!(table, @"
+        +------------+
+        | f          |
+        +------------+
+        | 1973-05-19 |
+        |            |
+        +------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_date_64() {
-        let expected = vec![
-            "+---------------------+",
-            "| f                   |",
-            "+---------------------+",
-            "| 2005-03-18T01:58:20 |",
-            "|                     |",
-            "+---------------------+",
-        ];
-        check_datetime!(Date64Array, 1111111100000, expected);
+        let table = format_primitive_batch::<Date64Type>(1111111100000);
+        insta::assert_snapshot!(table, @"
+        +---------------------+
+        | f                   |
+        +---------------------+
+        | 2005-03-18T01:58:20 |
+        |                     |
+        +---------------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_time_32_second() {
-        let expected = vec![
-            "+----------+",
-            "| f        |",
-            "+----------+",
-            "| 00:18:31 |",
-            "|          |",
-            "+----------+",
-        ];
-        check_datetime!(Time32SecondArray, 1111, expected);
+        let table = format_primitive_batch::<Time32SecondType>(1111);
+        insta::assert_snapshot!(table, @"
+        +----------+
+        | f        |
+        +----------+
+        | 00:18:31 |
+        |          |
+        +----------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_time_32_millisecond() {
-        let expected = vec![
-            "+--------------+",
-            "| f            |",
-            "+--------------+",
-            "| 03:05:11.111 |",
-            "|              |",
-            "+--------------+",
-        ];
-        check_datetime!(Time32MillisecondArray, 11111111, expected);
+        let table = format_primitive_batch::<Time32MillisecondType>(11111111);
+        insta::assert_snapshot!(table, @"
+        +--------------+
+        | f            |
+        +--------------+
+        | 03:05:11.111 |
+        |              |
+        +--------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_time_64_microsecond() {
-        let expected = vec![
-            "+-----------------+",
-            "| f               |",
-            "+-----------------+",
-            "| 00:00:11.111111 |",
-            "|                 |",
-            "+-----------------+",
-        ];
-        check_datetime!(Time64MicrosecondArray, 11111111, expected);
+        let table = format_primitive_batch::<Time64MicrosecondType>(11111111);
+        insta::assert_snapshot!(table, @"
+        +-----------------+
+        | f               |
+        +-----------------+
+        | 00:00:11.111111 |
+        |                 |
+        +-----------------+
+        ");
     }
 
     #[test]
     fn test_pretty_format_time_64_nanosecond() {
-        let expected = vec![
-            "+--------------------+",
-            "| f                  |",
-            "+--------------------+",
-            "| 00:00:00.011111111 |",
-            "|                    |",
-            "+--------------------+",
-        ];
-        check_datetime!(Time64NanosecondArray, 11111111, expected);
+        let table = format_primitive_batch::<Time64NanosecondType>(11111111);
+        insta::assert_snapshot!(table, @"
+        +--------------------+
+        | f                  |
+        +--------------------+
+        | 00:00:00.011111111 |
+        |                    |
+        +--------------------+
+        ");
     }
 
     #[test]
     fn test_int_display() {
         let array = Arc::new(Int32Array::from(vec![6, 3])) as ArrayRef;
-        let actual_one = array_value_to_string(&array, 0).unwrap();
-        let expected_one = "6";
-
-        let actual_two = array_value_to_string(&array, 1).unwrap();
-        let expected_two = "3";
-        assert_eq!(actual_one, expected_one);
-        assert_eq!(actual_two, expected_two);
+        insta::assert_snapshot!(array_value_to_string(&array, 0).unwrap(), @"6");
+        insta::assert_snapshot!(array_value_to_string(&array, 1).unwrap(), @"3");
     }
 
     #[test]
@@ -794,19 +758,16 @@ mod tests {
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
-        let expected = vec![
-            "+-------+",
-            "| f     |",
-            "+-------+",
-            "| 1.01  |",
-            "|       |",
-            "| 2.00  |",
-            "| 30.40 |",
-            "+-------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +-------+
+        | f     |
+        +-------+
+        | 1.01  |
+        |       |
+        | 2.00  |
+        | 30.40 |
+        +-------+
+        ");
     }
 
     #[test]
@@ -831,13 +792,17 @@ mod tests {
         let batch = RecordBatch::try_new(schema, vec![dm]).unwrap();
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let expected = vec![
-            "+------+", "| f    |", "+------+", "| 101  |", "|      |", "| 200  |", "| 3040 |",
-            "+------+",
-        ];
 
-        let actual: Vec<&str> = table.lines().collect();
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +------+
+        | f    |
+        +------+
+        | 101  |
+        |      |
+        | 200  |
+        | 3040 |
+        +------+
+        ");
     }
 
     #[test]
@@ -881,18 +846,16 @@ mod tests {
             RecordBatch::try_new(Arc::new(schema), vec![Arc::new(c1), Arc::new(c2)]).unwrap();
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let expected = vec![
-            "+--------------------------+----+",
-            "| c1                       | c2 |",
-            "+--------------------------+----+",
-            "| {c11: 1, c12: {c121: e}} | a  |",
-            "| {c11: , c12: {c121: f}}  | b  |",
-            "| {c11: 5, c12: {c121: g}} | c  |",
-            "+--------------------------+----+",
-        ];
 
-        let actual: Vec<&str> = table.lines().collect();
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +--------------------------+----+
+        | c1                       | c2 |
+        +--------------------------+----+
+        | {c11: 1, c12: {c121: e}} | a  |
+        | {c11: , c12: {c121: f}}  | b  |
+        | {c11: 5, c12: {c121: g}} | c  |
+        +--------------------------+----+
+        ");
     }
 
     #[test]
@@ -916,19 +879,17 @@ mod tests {
 
         let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let actual: Vec<&str> = table.lines().collect();
-        let expected = vec![
-            "+------------+",
-            "| Teamsters  |",
-            "+------------+",
-            "| {a=1}      |",
-            "| {b=3.2234} |",
-            "| {b=}       |",
-            "| {a=}       |",
-            "+------------+",
-        ];
 
-        assert_eq!(expected, actual);
+        insta::assert_snapshot!(table, @"
+        +------------+
+        | Teamsters  |
+        +------------+
+        | {a=1}      |
+        | {b=3.2234} |
+        | {b=}       |
+        | {a=}       |
+        +------------+
+        ");
     }
 
     #[test]
@@ -952,19 +913,17 @@ mod tests {
 
         let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(union)]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let actual: Vec<&str> = table.lines().collect();
-        let expected = vec![
-            "+------------+",
-            "| Teamsters  |",
-            "+------------+",
-            "| {a=1}      |",
-            "| {b=3.2234} |",
-            "| {b=}       |",
-            "| {a=}       |",
-            "+------------+",
-        ];
 
-        assert_eq!(expected, actual);
+        insta::assert_snapshot!(table, @"
+        +------------+
+        | Teamsters  |
+        +------------+
+        | {a=1}      |
+        | {b=3.2234} |
+        | {b=}       |
+        | {a=}       |
+        +------------+
+        ");
     }
 
     #[test]
@@ -1012,19 +971,18 @@ mod tests {
 
         let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(outer)]).unwrap();
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
-        let actual: Vec<&str> = table.lines().collect();
-        let expected = vec![
-            "+-----------------------------+",
-            "| Teamsters                   |",
-            "+-----------------------------+",
-            "| {European Union={b=1}}      |",
-            "| {European Union={c=3.2234}} |",
-            "| {a=}                        |",
-            "| {a=1234}                    |",
-            "| {European Union={c=}}       |",
-            "+-----------------------------+",
-        ];
-        assert_eq!(expected, actual);
+
+        insta::assert_snapshot!(table, @"
+        +-----------------------------+
+        | Teamsters                   |
+        +-----------------------------+
+        | {European Union={b=1}}      |
+        | {European Union={c=3.2234}} |
+        | {a=}                        |
+        | {a=1234}                    |
+        | {European Union={c=}}       |
+        +-----------------------------+
+        ");
     }
 
     #[test]
@@ -1055,21 +1013,18 @@ mod tests {
         )
         .unwrap();
 
-        let mut buf = String::new();
-        write!(&mut buf, "{}", pretty_format_batches(&[batch]).unwrap()).unwrap();
-
-        let s = [
-            "+---+-----+",
-            "| a | b   |",
-            "+---+-----+",
-            "| a | 1   |",
-            "| b |     |",
-            "|   | 10  |",
-            "| d | 100 |",
-            "+---+-----+",
-        ];
-        let expected = s.join("\n");
-        assert_eq!(expected, buf);
+        let table = pretty_format_batches(&[batch]).unwrap().to_string();
+
+        insta::assert_snapshot!(table, @"
+        +---+-----+
+        | a | b   |
+        +---+-----+
+        | a | 1   |
+        | b |     |
+        |   | 10  |
+        | d | 100 |
+        +---+-----+
+        ");
     }
 
     #[test]
@@ -1091,12 +1046,15 @@ mod tests {
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
-        let expected = vec![
-            "+------+", "| f16  |", "+------+", "| NaN  |", "| 4    |", "| -inf |", "+------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +------+
+        | f16  |
+        +------+
+        | NaN  |
+        | 4    |
+        | -inf |
+        +------+
+        ");
     }
 
     #[test]
@@ -1121,23 +1079,19 @@ mod tests {
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
-        let expected = vec![
-            "+------------------+",
-            "| IntervalDayTime  |",
-            "+------------------+",
-            "| -1 days -10 mins |",
-            "| -1.001 secs      |",
-            "| -0.001 secs      |",
-            "| 0.001 secs       |",
-            "| 0.010 secs       |",
-            "| 0.100 secs       |",
-            "| 0 secs           |",
-            "+------------------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +------------------+
+        | IntervalDayTime  |
+        +------------------+
+        | -1 days -10 mins |
+        | -1.001 secs      |
+        | -0.001 secs      |
+        | 0.001 secs       |
+        | 0.010 secs       |
+        | 0.100 secs       |
+        | 0 secs           |
+        +------------------+
+        ");
     }
 
     #[test]
@@ -1169,30 +1123,26 @@ mod tests {
 
         let table = pretty_format_batches(&[batch]).unwrap().to_string();
 
-        let expected = vec![
-            "+--------------------------+",
-            "| IntervalMonthDayNano     |",
-            "+--------------------------+",
-            "| -1 mons -1 days -10 mins |",
-            "| -1.000000001 secs        |",
-            "| -0.000000001 secs        |",
-            "| 0.000000001 secs         |",
-            "| 0.000000010 secs         |",
-            "| 0.000000100 secs         |",
-            "| 0.000001000 secs         |",
-            "| 0.000010000 secs         |",
-            "| 0.000100000 secs         |",
-            "| 0.001000000 secs         |",
-            "| 0.010000000 secs         |",
-            "| 0.100000000 secs         |",
-            "| 1.000000000 secs         |",
-            "| 0 secs                   |",
-            "+--------------------------+",
-        ];
-
-        let actual: Vec<&str> = table.lines().collect();
-
-        assert_eq!(expected, actual, "Actual result:\n{table}");
+        insta::assert_snapshot!(table, @"
+        +--------------------------+
+        | IntervalMonthDayNano     |
+        +--------------------------+
+        | -1 mons -1 days -10 mins |
+        | -1.000000001 secs        |
+        | -0.000000001 secs        |
+        | 0.000000001 secs         |
+        | 0.000000010 secs         |
+        | 0.000000100 secs         |
+        | 0.000001000 secs         |
+        | 0.000010000 secs         |
+        | 0.000100000 secs         |
+        | 0.001000000 secs         |
+        | 0.010000000 secs         |
+        | 0.100000000 secs         |
+        | 1.000000000 secs         |
+        | 0 secs                   |
+        +--------------------------+
+        ");
     }
 
     #[test]
@@ -1218,40 +1168,34 @@ mod tests {
         .unwrap()
         .to_string();
 
-        let expected_column = vec![
-            "+----------------+",
-            "| my_column_name |",
-            "+----------------+",
-            "| 1              |",
-            "| 2              |",
-            "| null           |",
-            "| 3              |",
-            "| 4              |",
-            "+----------------+",
-        ];
-
-        let actual: Vec<&str> = column.lines().collect();
-        assert_eq!(expected_column, actual, "Actual result:\n{column}");
-
-        let batch = pretty_format_batches_with_options(&[batch], &options)
+        insta::assert_snapshot!(column, @"
+        +----------------+
+        | my_column_name |
+        +----------------+
+        | 1              |
+        | 2              |
+        | null           |
+        | 3              |
+        | 4              |
+        +----------------+
+        ");
+
+        let table = pretty_format_batches_with_options(&[batch], &options)
             .unwrap()
             .to_string();
 
-        let expected_table = vec![
-            "+---------------+----------------+",
-            "| my_int32_name | my_string_name |",
-            "| Int32         | Utf8           |",
-            "+---------------+----------------+",
-            "| 1             | foo            |",
-            "| 2             | bar            |",
-            "| null          | null           |",
-            "| 3             | baz            |",
-            "| 4             | null           |",
-            "+---------------+----------------+",
-        ];
-
-        let actual: Vec<&str> = batch.lines().collect();
-        assert_eq!(expected_table, actual, "Actual result:\n{batch}");
+        insta::assert_snapshot!(table, @"
+        +---------------+----------------+
+        | my_int32_name | my_string_name |
+        | Int32         | Utf8           |
+        +---------------+----------------+
+        | 1             | foo            |
+        | 2             | bar            |
+        | null          | null           |
+        | 3             | baz            |
+        | 4             | null           |
+        +---------------+----------------+
+        ");
     }
 
     #[test]
@@ -1268,20 +1212,16 @@ mod tests {
                 .unwrap()
                 .to_string();
 
-        // Expected output
-        let expected_pretty = vec![
-            "+------------------------------+",
-            "| pretty                       |",
-            "+------------------------------+",
-            "| <invalid>                    |",
-            "| <invalid>                    |",
-            "| 0 days 1 hours 1 mins 1 secs |",
-            "| null                         |",
-            "+------------------------------+",
-        ];
-
-        let actual: Vec<&str> = pretty.lines().collect();
-        assert_eq!(expected_pretty, actual, "Actual result:\n{pretty}");
+        insta::assert_snapshot!(pretty, @"
+        +------------------------------+
+        | pretty                       |
+        +------------------------------+
+        | <invalid>                    |
+        | <invalid>                    |
+        | 0 days 1 hours 1 mins 1 secs |
+        | null                         |
+        +------------------------------+
+        ");
 
         // ISO8601 formatting
         let opts_iso = FormatOptions::default()
@@ -1291,20 +1231,16 @@ mod tests {
             .unwrap()
             .to_string();
 
-        // Expected output
-        let expected_iso = vec![
-            "+-----------+",
-            "| iso       |",
-            "+-----------+",
-            "| <invalid> |",
-            "| <invalid> |",
-            "| PT3661S   |",
-            "| null      |",
-            "+-----------+",
-        ];
-
-        let actual: Vec<&str> = iso.lines().collect();
-        assert_eq!(expected_iso, actual, "Actual result:\n{iso}");
+        insta::assert_snapshot!(iso, @"
+        +-----------+
+        | iso       |
+        +-----------+
+        | <invalid> |
+        | <invalid> |
+        | PT3661S   |
+        | null      |
+        +-----------+
+        ");
     }
 
     //
@@ -1408,26 +1344,20 @@ mod tests {
         )
         .unwrap();
 
-        let mut buf = String::new();
-        write!(
-            &mut buf,
-            "{}",
-            pretty_format_batches_with_options(&[batch], &options).unwrap()
-        )
-        .unwrap();
+        let table = pretty_format_batches_with_options(&[batch], &options)
+            .unwrap()
+            .to_string();
 
-        let s = [
-            "+--------+",
-            "| income |",
-            "+--------+",
-            "| 1 €    |",
-            "| <NULL> |",
-            "| 10 €   |",
-            "| 100 €  |",
-            "+--------+",
-        ];
-        let expected = s.join("\n");
-        assert_eq!(expected, buf);
+        insta::assert_snapshot!(table, @"
+        +--------+
+        | income |
+        +--------+
+        | 1 €    |
+        | <NULL> |
+        | 10 €   |
+        | 100 €  |
+        +--------+
+        ");
     }
 
     #[test]
@@ -1466,24 +1396,18 @@ mod tests {
         // define data.
         let batch = RecordBatch::try_new(schema, vec![Arc::new(outer_list)]).unwrap();
 
-        let mut buf = String::new();
-        write!(
-            &mut buf,
-            "{}",
-            pretty_format_batches_with_options(&[batch], &options).unwrap()
-        )
-        .unwrap();
+        let table = pretty_format_batches_with_options(&[batch], &options)
+            .unwrap()
+            .to_string();
 
-        let s = [
-            "+----------------------------------+",
-            "| income                           |",
-            "+----------------------------------+",
-            "| [[1 €], <NULL>]                  |",
-            "| [[2 €, 8 €], [50 €, 25 €, 25 €]] |",
-            "+----------------------------------+",
-        ];
-        let expected = s.join("\n");
-        assert_eq!(expected, buf);
+        insta::assert_snapshot!(table, @"
+        +----------------------------------+
+        | income                           |
+        +----------------------------------+
+        | [[1 €], <NULL>]                  |
+        | [[2 €, 8 €], [50 €, 25 €, 25 €]] |
+        +----------------------------------+
+        ");
     }
 
     #[test]
@@ -1530,25 +1454,19 @@ mod tests {
         // define data.
         let batch = RecordBatch::try_new(schema, vec![Arc::new(nested_data.finish())]).unwrap();
 
-        let mut buf = String::new();
-        write!(
-            &mut buf,
-            "{}",
-            pretty_format_batches_with_options(&[batch], &options).unwrap()
-        )
-        .unwrap();
+        let table = pretty_format_batches_with_options(&[batch], &options)
+            .unwrap()
+            .to_string();
 
-        let s = [
-            "+---------------------------------+",
-            "| income                          |",
-            "+---------------------------------+",
-            "| {name: Gimli, income: 10 €}     |",
-            "| {name: Legolas, income: <NULL>} |",
-            "| {name: Aragorn, income: 30 €}   |",
-            "+---------------------------------+",
-        ];
-        let expected = s.join("\n");
-        assert_eq!(expected, buf);
+        insta::assert_snapshot!(table, @"
+        +---------------------------------+
+        | income                          |
+        +---------------------------------+
+        | {name: Gimli, income: 10 €}     |
+        | {name: Legolas, income: <NULL>} |
+        | {name: Aragorn, income: 30 €}   |
+        +---------------------------------+
+        ");
     }
 
     #[test]
@@ -1585,23 +1503,17 @@ mod tests {
         )]));
         let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();
 
-        let mut buf = String::new();
-        write!(
-            &mut buf,
-            "{}",
-            pretty_format_batches_with_options(&[batch], &options).unwrap()
-        )
-        .unwrap();
+        let table = pretty_format_batches_with_options(&[batch], &options)
+            .unwrap()
+            .to_string();
 
-        let s = [
-            "+-----------------------------------------------+",
-            "| income                                        |",
-            "+-----------------------------------------------+",
-            "| {Gimli: 10 €, Legolas: <NULL>, Aragorn: 30 €} |",
-            "+-----------------------------------------------+",
-        ];
-        let expected = s.join("\n");
-        assert_eq!(expected, buf);
+        insta::assert_snapshot!(table, @"
+        +-----------------------------------------------+
+        | income                                        |
+        +-----------------------------------------------+
+        | {Gimli: 10 €, Legolas: <NULL>, Aragorn: 30 €} |
+        +-----------------------------------------------+
+        ");
     }
 
     #[test]
@@ -1635,23 +1547,17 @@ mod tests {
         // define data.
         let batch = RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap();
 
-        let mut buf = String::new();
-        write!(
-            &mut buf,
-            "{}",
-            pretty_format_batches_with_options(&[batch], &options).unwrap()
-        )
-        .unwrap();
+        let table = pretty_format_batches_with_options(&[batch], &options)
+            .unwrap()
+            .to_string();
 
-        let s = [
-            "+--------------+",
-            "| income       |",
-            "+--------------+",
-            "| {income=1 €} |",
-            "+--------------+",
-        ];
-        let expected = s.join("\n");
-        assert_eq!(expected, buf);
+        insta::assert_snapshot!(table, @"
+        +--------------+
+        | income       |
+        +--------------+
+        | {income=1 €} |
+        +--------------+
+        ");
     }
 
     #[test]
@@ -1678,37 +1584,30 @@ mod tests {
         )
         .unwrap();
 
-        let mut buf = String::new();
-        write!(
-            &mut buf,
-            "{}",
-            create_table(
-                // No metadata compared to test_format_batches_with_custom_formatters
-                Some(Arc::new(Schema::new(vec![Field::new(
-                    "income",
-                    DataType::Int32,
-                    true
-                ),]))),
-                &[batch],
-                &options,
-            )
-            .unwrap()
+        let table = create_table(
+            // No metadata compared to test_format_batches_with_custom_formatters
+            Some(Arc::new(Schema::new(vec![Field::new(
+                "income",
+                DataType::Int32,
+                true,
+            )]))),
+            &[batch],
+            &options,
         )
-        .unwrap();
+        .unwrap()
+        .to_string();
 
         // No € formatting as in test_format_batches_with_custom_formatters
-        let s = [
-            "+--------------+",
-            "| income       |",
-            "+--------------+",
-            "| 1 (32-Bit)   |",
-            "|              |",
-            "| 10 (32-Bit)  |",
-            "| 100 (32-Bit) |",
-            "+--------------+",
-        ];
-        let expected = s.join("\n");
-        assert_eq!(expected, buf);
+        insta::assert_snapshot!(table, @"
+        +--------------+
+        | income       |
+        +--------------+
+        | 1 (32-Bit)   |
+        |              |
+        | 10 (32-Bit)  |
+        | 100 (32-Bit) |
+        +--------------+
+        ");
     }
 
     #[test]
@@ -1721,31 +1620,24 @@ mod tests {
             Some(100),
         ]));
 
-        let mut buf = String::new();
-        write!(
-            &mut buf,
-            "{}",
-            pretty_format_columns_with_options(
-                "income",
-                &[array],
-                &FormatOptions::default().with_formatter_factory(Some(&TestFormatters {}))
-            )
-            .unwrap()
+        let table = pretty_format_columns_with_options(
+            "income",
+            &[array],
+            &FormatOptions::default().with_formatter_factory(Some(&TestFormatters {})),
         )
-        .unwrap();
+        .unwrap()
+        .to_string();
 
-        let s = [
-            "+--------------+",
-            "| income       |",
-            "+--------------+",
-            "| 1 (32-Bit)   |",
-            "|              |",
-            "| 10 (32-Bit)  |",
-            "| 100 (32-Bit) |",
-            "+--------------+",
-        ];
-        let expected = s.join("\n");
-        assert_eq!(expected, buf);
+        insta::assert_snapshot!(table, @"
+        +--------------+
+        | income       |
+        +--------------+
+        | 1 (32-Bit)   |
+        |              |
+        | 10 (32-Bit)  |
+        | 100 (32-Bit) |
+        +--------------+
+        ");
     }
 
     #[test]
@@ -1771,9 +1663,160 @@ mod tests {
         let error = pretty_format_batches_with_schema(schema_a, &[batch])
             .err()
             .unwrap();
-        assert_eq!(
-            &error.to_string(),
-            "Invalid argument error: Expected the same number of columns in a record batch (1) as the number of fields (2) in the schema"
-        );
+        insta::assert_snapshot!(error, @"Invalid argument error: Expected the same number of columns in a record batch (1) as the number of fields (2) in the schema");
+    }
+
+    #[test]
+    fn test_quoted_strings() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "strings",
+            DataType::Utf8,
+            true,
+        )]));
+
+        let string_array = StringArray::from(vec![
+            Some("hello"),
+            Some("world"),
+            Some(""),
+            Some("tab\there"),
+            Some("newline\ntest"),
+            Some("quote\"test"),
+            Some("backslash\\test"),
+            None,
+        ]);
+
+        let batch = RecordBatch::try_new(schema.clone(), vec![Arc::new(string_array)]).unwrap();
+
+        let options_none = FormatOptions::new().with_null("NULL");
+        let table = pretty_format_batches_with_options(std::slice::from_ref(&batch), &options_none)
+            .unwrap()
+            .to_string();
+
+        insta::assert_snapshot!(table, @"
+        +----------------+
+        | strings        |
+        +----------------+
+        | hello          |
+        | world          |
+        |                |
+        | tab	here       |
+        | newline        |
+        | test           |
+        | quote\"test     |
+        | backslash\\test |
+        | NULL           |
+        +----------------+
+        ");
+
+        let options_quoted = FormatOptions::new()
+            .with_null("NULL")
+            .with_quoted_strings(true);
+
+        let table = pretty_format_batches_with_options(&[batch], &options_quoted)
+            .unwrap()
+            .to_string();
+
+        insta::assert_snapshot!(table, @r#"
+        +-------------------+
+        | strings           |
+        +-------------------+
+        | "hello"           |
+        | "world"           |
+        | ""                |
+        | "tab\there"       |
+        | "newline\ntest"   |
+        | "quote\"test"     |
+        | "backslash\\test" |
+        | NULL              |
+        +-------------------+
+        "#);
+    }
+
+    #[test]
+    fn test_string_view_quoted() {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "view_strings",
+            DataType::Utf8View,
+            true,
+        )]));
+
+        let mut builder = StringViewBuilder::new();
+        builder.append_value("hello");
+        builder.append_null();
+        builder.append_value("quote\"test");
+
+        let array: ArrayRef = Arc::new(builder.finish());
+        let batch = RecordBatch::try_new(schema, vec![array]).unwrap();
+
+        let options = FormatOptions::new().with_quoted_strings(true);
+
+        let table = pretty_format_batches_with_options(&[batch], &options)
+            .unwrap()
+            .to_string();
+
+        insta::assert_snapshot!(table, @"
+        +---------------+
+        | view_strings  |
+        +---------------+
+        | \"hello\"       |
+        |               |
+        | \"quote\\\"test\" |
+        +---------------+
+        ");
+    }
+
+    #[test]
+    fn test_quoted_strings_in_struct() {
+        let string_builder = StringBuilder::new();
+        let mut name_builder = string_builder;
+        name_builder.append_value("Alice");
+        name_builder.append_value("");
+        name_builder.append_value("Bob");
+
+        let fields = vec![Field::new("name", DataType::Utf8, false)];
+        let mut struct_builder = StructBuilder::new(fields, vec![Box::new(name_builder)]);
+        struct_builder.append(true);
+        struct_builder.append(true);
+        struct_builder.append(true);
+
+        let struct_array = struct_builder.finish();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "person",
+            struct_array.data_type().clone(),
+            false,
+        )]));
+
+        let batch = RecordBatch::try_new(schema, vec![Arc::new(struct_array)]).unwrap();
+
+        let options_none = FormatOptions::new();
+        let table = pretty_format_batches_with_options(std::slice::from_ref(&batch), &options_none)
+            .unwrap()
+            .to_string();
+
+        insta::assert_snapshot!(table, @"
+        +---------------+
+        | person        |
+        +---------------+
+        | {name: Alice} |
+        | {name: }      |
+        | {name: Bob}   |
+        +---------------+
+        ");
+
+        let options_quoted = FormatOptions::new().with_quoted_strings(true);
+        let table = pretty_format_batches_with_options(&[batch], &options_quoted)
+            .unwrap()
+            .to_string();
+
+        insta::assert_snapshot!(table, @"
+        +-----------------+
+        | person          |
+        +-----------------+
+        | {name: \"Alice\"} |
+        | {name: \"\"}      |
+        | {name: \"Bob\"}   |
+        +-----------------+
+        ");
     }
 }
diff --git a/arrow-data/Cargo.toml b/arrow-data/Cargo.toml
index 9c7a5206b2f4..9f1b50ed14d9 100644
--- a/arrow-data/Cargo.toml
+++ b/arrow-data/Cargo.toml
@@ -39,6 +39,8 @@ bench = false
 force_validate = []
 # Enable ffi support
 ffi = ["arrow-schema/ffi"]
+# Enable memory tracking support
+pool = ["arrow-buffer/pool"]
 
 [package.metadata.docs.rs]
 all-features = true
diff --git a/arrow-data/src/data.rs b/arrow-data/src/data.rs
index 21cf4e5b5e2c..a5a64dfe9f38 100644
--- a/arrow-data/src/data.rs
+++ b/arrow-data/src/data.rs
@@ -1659,6 +1659,30 @@ impl ArrayData {
     pub fn into_builder(self) -> ArrayDataBuilder {
         self.into()
     }
+
+    /// Claim memory used by this ArrayData in the provided memory pool.
+    ///
+    /// This claims memory for:
+    /// - All buffers in self.buffers
+    /// - All child ArrayData recursively
+    /// - The null buffer if present
+    #[cfg(feature = "pool")]
+    pub fn claim(&self, pool: &dyn arrow_buffer::MemoryPool) {
+        // Claim all data buffers
+        for buffer in &self.buffers {
+            buffer.claim(pool);
+        }
+
+        // Claim null buffer if present
+        if let Some(nulls) = &self.nulls {
+            nulls.claim(pool);
+        }
+
+        // Recursively claim child data
+        for child in &self.child_data {
+            child.claim(pool);
+        }
+    }
 }
 
 /// Return the expected [`DataTypeLayout`] Arrays of this data
diff --git a/arrow-data/src/transform/list_view.rs b/arrow-data/src/transform/list_view.rs
index 9b66a6a6abb1..f01e14b978c9 100644
--- a/arrow-data/src/transform/list_view.rs
+++ b/arrow-data/src/transform/list_view.rs
@@ -27,21 +27,20 @@ pub(super) fn build_extend<T: ArrowNativeType + Integer + CheckedAdd>(
     let offsets = array.buffer::<T>(0);
     let sizes = array.buffer::<T>(1);
     Box::new(
-        move |mutable: &mut _MutableArrayData, _index: usize, start: usize, len: usize| {
-            let offset_buffer = &mut mutable.buffer1;
-            let sizes_buffer = &mut mutable.buffer2;
+        move |mutable: &mut _MutableArrayData, index: usize, start: usize, len: usize| {
+            let mut new_offset = T::usize_as(mutable.child_data[0].len());
 
-            for &offset in &offsets[start..start + len] {
-                offset_buffer.push(offset);
-            }
+            for i in start..start + len {
+                mutable.buffer1.push(new_offset);
+                mutable.buffer2.push(sizes[i]);
+                new_offset = new_offset.checked_add(&sizes[i]).expect("offset overflow");
 
-            // sizes
-            for &size in &sizes[start..start + len] {
-                sizes_buffer.push(size);
+                let size = sizes[i].as_usize();
+                if size > 0 {
+                    let child_start = offsets[i].as_usize();
+                    mutable.child_data[0].extend(index, child_start, child_start + size);
+                }
             }
-
-            // the beauty of views is that we don't need to copy child_data, we just splat
-            // the offsets and sizes.
         },
     )
 }
diff --git a/arrow-data/src/transform/mod.rs b/arrow-data/src/transform/mod.rs
index c6052817bfb6..66f6603f02fc 100644
--- a/arrow-data/src/transform/mod.rs
+++ b/arrow-data/src/transform/mod.rs
@@ -813,8 +813,8 @@ impl<'a> MutableArrayData<'a> {
         };
 
         let nulls = match data.data_type {
-            // RunEndEncoded and Null arrays cannot have top-level null bitmasks
-            DataType::RunEndEncoded(_, _) | DataType::Null => None,
+            // RunEndEncoded, Null, and Union arrays cannot have top-level null bitmasks
+            DataType::RunEndEncoded(_, _) | DataType::Null | DataType::Union(_, _) => None,
             _ => data
                 .null_buffer
                 .map(|nulls| {
diff --git a/arrow-data/src/transform/union.rs b/arrow-data/src/transform/union.rs
index f6f291e3f05d..d1301249d326 100644
--- a/arrow-data/src/transform/union.rs
+++ b/arrow-data/src/transform/union.rs
@@ -17,6 +17,7 @@
 
 use super::{_MutableArrayData, Extend};
 use crate::ArrayData;
+use arrow_schema::DataType;
 
 pub(super) fn build_extend_sparse(array: &ArrayData) -> Extend<'_> {
     let type_ids = array.buffer::<i8>(0);
@@ -68,10 +69,42 @@ pub(super) fn build_extend_dense(array: &ArrayData) -> Extend<'_> {
     )
 }
 
-pub(super) fn extend_nulls_dense(_mutable: &mut _MutableArrayData, _len: usize) {
-    panic!("cannot call extend_nulls on UnionArray as cannot infer type");
+pub(super) fn extend_nulls_dense(mutable: &mut _MutableArrayData, len: usize) {
+    let DataType::Union(fields, _) = &mutable.data_type else {
+        unreachable!()
+    };
+    let first_type_id = fields
+        .iter()
+        .next()
+        .expect("union must have at least one field")
+        .0;
+
+    // Extend type_ids buffer
+    mutable.buffer1.extend_from_slice(&vec![first_type_id; len]);
+
+    // Dense: extend offsets pointing into the first child, then extend nulls in that child
+    let child_offset = mutable.child_data[0].len();
+    let (start, end) = (child_offset as i32, (child_offset + len) as i32);
+    mutable.buffer2.extend(start..end);
+    mutable.child_data[0].extend_nulls(len);
 }
 
-pub(super) fn extend_nulls_sparse(_mutable: &mut _MutableArrayData, _len: usize) {
-    panic!("cannot call extend_nulls on UnionArray as cannot infer type");
+pub(super) fn extend_nulls_sparse(mutable: &mut _MutableArrayData, len: usize) {
+    let DataType::Union(fields, _) = &mutable.data_type else {
+        unreachable!()
+    };
+    let first_type_id = fields
+        .iter()
+        .next()
+        .expect("union must have at least one field")
+        .0;
+
+    // Extend type_ids buffer
+    mutable.buffer1.extend_from_slice(&vec![first_type_id; len]);
+
+    // Sparse: extend nulls in ALL children
+    mutable
+        .child_data
+        .iter_mut()
+        .for_each(|child| child.extend_nulls(len));
 }
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 187de400f6c0..191da024136f 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -528,13 +528,28 @@ fn prepare_field_for_flight(
         }
         DataType::Dictionary(_, value_type) => {
             if !send_dictionaries {
-                Field::new(
+                // Recurse into value type to handle nested dicts being stripped
+                let value_field = Field::new(
                     field.name(),
                     value_type.as_ref().clone(),
                     field.is_nullable(),
+                );
+                prepare_field_for_flight(
+                    &Arc::new(value_field),
+                    dictionary_tracker,
+                    send_dictionaries,
                 )
                 .with_metadata(field.metadata().clone())
             } else {
+                // Recurse into value type BEFORE registering this dict's id,
+                // matching the depth-first order of encode_dictionaries in the
+                // IPC writer which processes nested dicts before the parent.
+                let value_field = Field::new("values", value_type.as_ref().clone(), true);
+                prepare_field_for_flight(
+                    &Arc::new(value_field),
+                    dictionary_tracker,
+                    send_dictionaries,
+                );
                 dictionary_tracker.next_dict_id();
                 #[allow(deprecated)]
                 Field::new_dict(
@@ -547,6 +562,44 @@ fn prepare_field_for_flight(
                 .with_metadata(field.metadata().clone())
             }
         }
+        DataType::ListView(inner) | DataType::LargeListView(inner) => {
+            let prepared = prepare_field_for_flight(inner, dictionary_tracker, send_dictionaries);
+            Field::new(
+                field.name(),
+                match field.data_type() {
+                    DataType::ListView(_) => DataType::ListView(Arc::new(prepared)),
+                    _ => DataType::LargeListView(Arc::new(prepared)),
+                },
+                field.is_nullable(),
+            )
+            .with_metadata(field.metadata().clone())
+        }
+        DataType::FixedSizeList(inner, size) => Field::new(
+            field.name(),
+            DataType::FixedSizeList(
+                Arc::new(prepare_field_for_flight(
+                    inner,
+                    dictionary_tracker,
+                    send_dictionaries,
+                )),
+                *size,
+            ),
+            field.is_nullable(),
+        )
+        .with_metadata(field.metadata().clone()),
+        DataType::RunEndEncoded(run_ends, values) => Field::new(
+            field.name(),
+            DataType::RunEndEncoded(
+                run_ends.clone(),
+                Arc::new(prepare_field_for_flight(
+                    values,
+                    dictionary_tracker,
+                    send_dictionaries,
+                )),
+            ),
+            field.is_nullable(),
+        )
+        .with_metadata(field.metadata().clone()),
         DataType::Map(inner, sorted) => Field::new(
             field.name(),
             DataType::Map(
@@ -556,7 +609,37 @@ fn prepare_field_for_flight(
             field.is_nullable(),
         )
         .with_metadata(field.metadata().clone()),
-        _ => field.as_ref().clone(),
+        DataType::Null
+        | DataType::Boolean
+        | DataType::Int8
+        | DataType::Int16
+        | DataType::Int32
+        | DataType::Int64
+        | DataType::UInt8
+        | DataType::UInt16
+        | DataType::UInt32
+        | DataType::UInt64
+        | DataType::Float16
+        | DataType::Float32
+        | DataType::Float64
+        | DataType::Timestamp(_, _)
+        | DataType::Date32
+        | DataType::Date64
+        | DataType::Time32(_)
+        | DataType::Time64(_)
+        | DataType::Duration(_)
+        | DataType::Interval(_)
+        | DataType::Binary
+        | DataType::FixedSizeBinary(_)
+        | DataType::LargeBinary
+        | DataType::BinaryView
+        | DataType::Utf8
+        | DataType::LargeUtf8
+        | DataType::Utf8View
+        | DataType::Decimal32(_, _)
+        | DataType::Decimal64(_, _)
+        | DataType::Decimal128(_, _)
+        | DataType::Decimal256(_, _) => field.as_ref().clone(),
     }
 }
 
@@ -573,33 +656,7 @@ fn prepare_schema_for_flight(
     let fields: Fields = schema
         .fields()
         .iter()
-        .map(|field| match field.data_type() {
-            DataType::Dictionary(_, value_type) => {
-                if !send_dictionaries {
-                    Field::new(
-                        field.name(),
-                        value_type.as_ref().clone(),
-                        field.is_nullable(),
-                    )
-                    .with_metadata(field.metadata().clone())
-                } else {
-                    dictionary_tracker.next_dict_id();
-                    #[allow(deprecated)]
-                    Field::new_dict(
-                        field.name(),
-                        field.data_type().clone(),
-                        field.is_nullable(),
-                        0,
-                        field.dict_is_ordered().unwrap_or_default(),
-                    )
-                    .with_metadata(field.metadata().clone())
-                }
-            }
-            tpe if tpe.is_nested() => {
-                prepare_field_for_flight(field, dictionary_tracker, send_dictionaries)
-            }
-            _ => field.as_ref().clone(),
-        })
+        .map(|field| prepare_field_for_flight(field, dictionary_tracker, send_dictionaries))
         .collect();
 
     Schema::new(fields).with_metadata(schema.metadata().clone())
@@ -729,7 +786,8 @@ fn hydrate_dictionary(array: &ArrayRef, data_type: &DataType) -> Result<ArrayRef
 mod tests {
     use crate::decode::{DecodedPayload, FlightDataDecoder};
     use arrow_array::builder::{
-        GenericByteDictionaryBuilder, ListBuilder, StringDictionaryBuilder, StructBuilder,
+        FixedSizeListBuilder, GenericByteDictionaryBuilder, GenericListViewBuilder, ListBuilder,
+        StringDictionaryBuilder, StructBuilder,
     };
     use arrow_array::*;
     use arrow_array::{cast::downcast_array, types::*};
@@ -1540,6 +1598,160 @@ mod tests {
         verify_flight_round_trip(vec![batch1, batch2]).await;
     }
 
+    #[tokio::test]
+    async fn test_dictionary_ree_resend() {
+        let dict_values1 = vec![Some("a"), None, Some("b")]
+            .into_iter()
+            .collect::<DictionaryArray<Int32Type>>();
+        let run_ends1 = Int32Array::from(vec![1, 2, 3]);
+        let arr1 = RunArray::try_new(&run_ends1, &dict_values1).unwrap();
+
+        let dict_values2 = vec![Some("c"), Some("a")]
+            .into_iter()
+            .collect::<DictionaryArray<Int32Type>>();
+        let run_ends2 = Int32Array::from(vec![1, 2]);
+        let arr2 = RunArray::try_new(&run_ends2, &dict_values2).unwrap();
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "ree",
+            arr1.data_type().clone(),
+            true,
+        )]));
+
+        let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap();
+        let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap();
+
+        verify_flight_round_trip(vec![batch1, batch2]).await;
+    }
+
+    #[tokio::test]
+    async fn test_dictionary_of_struct_of_dict_resend() {
+        // Dict(Int8, Struct { dict: Dict(Int32, Utf8), int: Int32 })
+        // This exercises the Dictionary branch recursing into its value type
+        // before assigning its own dict_id (depth-first ordering).
+        let struct_fields: Vec<Field> = vec![
+            Field::new_dictionary("dict", DataType::Int32, DataType::Utf8, true),
+            Field::new("int", DataType::Int32, false),
+        ];
+
+        let inner_values =
+            StringArray::from(vec![Some("alpha"), None, Some("beta"), Some("gamma")]);
+        let inner_keys = Int32Array::from_iter_values([0, 1, 2, 3, 0]);
+        let inner_dict = DictionaryArray::new(inner_keys, Arc::new(inner_values));
+        let int_array = Int32Array::from(vec![10, 20, 30, 40, 50]);
+
+        let struct_array = StructArray::from(vec![
+            (
+                Arc::new(struct_fields[0].clone()),
+                Arc::new(inner_dict) as ArrayRef,
+            ),
+            (
+                Arc::new(struct_fields[1].clone()),
+                Arc::new(int_array) as ArrayRef,
+            ),
+        ]);
+
+        let outer_keys = Int8Array::from_iter_values([0, 0, 1, 2]);
+        let arr1 = DictionaryArray::new(outer_keys, Arc::new(struct_array));
+
+        let inner_values2 = StringArray::from(vec![Some("x"), Some("y")]);
+        let inner_keys2 = Int32Array::from_iter_values([0, 1, 0]);
+        let inner_dict2 = DictionaryArray::new(inner_keys2, Arc::new(inner_values2));
+        let int_array2 = Int32Array::from(vec![100, 200, 300]);
+
+        let struct_array2 = StructArray::from(vec![
+            (
+                Arc::new(struct_fields[0].clone()),
+                Arc::new(inner_dict2) as ArrayRef,
+            ),
+            (
+                Arc::new(struct_fields[1].clone()),
+                Arc::new(int_array2) as ArrayRef,
+            ),
+        ]);
+
+        let outer_keys2 = Int8Array::from_iter_values([0, 1]);
+        let arr2 = DictionaryArray::new(outer_keys2, Arc::new(struct_array2));
+
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "dict_struct",
+            arr1.data_type().clone(),
+            false,
+        )]));
+
+        let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap();
+        let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap();
+
+        verify_flight_round_trip(vec![batch1, batch2]).await;
+    }
+
+    async fn verify_dictionary_list_view_resend<O: OffsetSizeTrait>() {
+        let mut builder =
+            GenericListViewBuilder::<O, _>::new(StringDictionaryBuilder::<UInt16Type>::new());
+
+        builder.append_value(vec![Some("a"), None, Some("b")]);
+        let arr1 = builder.finish();
+
+        builder.append_value(vec![Some("c"), None, Some("d")]);
+        let arr2 = builder.finish();
+
+        let inner = Arc::new(Field::new_dictionary(
+            "item",
+            DataType::UInt16,
+            DataType::Utf8,
+            true,
+        ));
+        let dt = if O::IS_LARGE {
+            DataType::LargeListView(inner)
+        } else {
+            DataType::ListView(inner)
+        };
+        let schema = Arc::new(Schema::new(vec![Field::new("dict_list_view", dt, true)]));
+
+        let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap();
+        let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap();
+
+        verify_flight_round_trip(vec![batch1, batch2]).await;
+    }
+
+    #[tokio::test]
+    async fn test_dictionary_list_view_resend() {
+        verify_dictionary_list_view_resend::<i32>().await;
+    }
+
+    #[tokio::test]
+    async fn test_dictionary_large_list_view_resend() {
+        verify_dictionary_list_view_resend::<i64>().await;
+    }
+
+    #[tokio::test]
+    async fn test_dictionary_fixed_size_list_resend() {
+        let mut builder =
+            FixedSizeListBuilder::new(StringDictionaryBuilder::<UInt16Type>::new(), 2);
+
+        builder.values().append_value("a");
+        builder.values().append_value("b");
+        builder.append(true);
+        let arr1 = builder.finish();
+
+        builder.values().append_value("c");
+        builder.values().append_value("d");
+        builder.append(true);
+        let arr2 = builder.finish();
+
+        let schema = Arc::new(Schema::new(vec![Field::new_fixed_size_list(
+            "dict_fsl",
+            Field::new_dictionary("item", DataType::UInt16, DataType::Utf8, true),
+            2,
+            true,
+        )]));
+
+        let batch1 = RecordBatch::try_new(schema.clone(), vec![Arc::new(arr1)]).unwrap();
+        let batch2 = RecordBatch::try_new(schema, vec![Arc::new(arr2)]).unwrap();
+
+        verify_flight_round_trip(vec![batch1, batch2]).await;
+    }
+
     async fn verify_flight_round_trip(mut batches: Vec<RecordBatch>) {
         let expected_schema = batches.first().unwrap().schema();
 
diff --git a/arrow-ipc/Cargo.toml b/arrow-ipc/Cargo.toml
index 943852ffdec9..ae603d2acd93 100644
--- a/arrow-ipc/Cargo.toml
+++ b/arrow-ipc/Cargo.toml
@@ -42,7 +42,7 @@ arrow-data = { workspace = true }
 arrow-schema = { workspace = true }
 arrow-select = { workspace = true}
 flatbuffers = { version = "25.2.10", default-features = false }
-lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"], optional = true }
+lz4_flex = { version = "0.13", default-features = false, features = ["std", "frame"], optional = true }
 zstd = { version = "0.13.0", default-features = false, optional = true }
 
 [features]
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index 23217fec6dfe..9d3fca293874 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -417,7 +417,12 @@ impl IpcDataGenerator {
                 // sequence is assigned depth-first, so we need to first encode children and have
                 // them take their assigned dict IDs before we take the dict ID for this field.
                 let dict_id = dict_id_seq.next().ok_or_else(|| {
-                    ArrowError::IpcError(format!("no dict id for field {}", field.name()))
+                    ArrowError::IpcError(format!(
+                        "no dict id for field {:?}: field.data_type={:?}, column.data_type={:?}",
+                        field.name(),
+                        field.data_type(),
+                        column.data_type()
+                    ))
                 })?;
 
                 match dictionary_tracker.insert_column(
diff --git a/arrow-json/Cargo.toml b/arrow-json/Cargo.toml
index 5fcde480eb6d..851f0a244f53 100644
--- a/arrow-json/Cargo.toml
+++ b/arrow-json/Cargo.toml
@@ -61,11 +61,16 @@ tokio = { version = "1.27", default-features = false, features = ["io-util"] }
 bytes = "1.4"
 criterion = { workspace = true, default-features = false }
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
+arbitrary = { version = "1.4.2", features = ["derive"] }
 
 [[bench]]
 name = "serde"
 harness = false
 
 [[bench]]
-name = "json-reader"
+name = "json_reader"
+harness = false
+
+[[bench]]
+name = "json_writer"
 harness = false
diff --git a/arrow-json/benches/json-reader.rs b/arrow-json/benches/json-reader.rs
deleted file mode 100644
index 504839f8ffe2..000000000000
--- a/arrow-json/benches/json-reader.rs
+++ /dev/null
@@ -1,250 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-use arrow_json::ReaderBuilder;
-use arrow_json::reader::Decoder;
-use arrow_schema::{DataType, Field, Schema};
-use criterion::{
-    BenchmarkId, Criterion, SamplingMode, Throughput, criterion_group, criterion_main,
-};
-use serde_json::{Map, Number, Value};
-use std::fmt::Write;
-use std::hint::black_box;
-use std::sync::Arc;
-
-const ROWS: usize = 1 << 17; // 128K rows
-const BATCH_SIZE: usize = 1 << 13; // 8K rows per batch
-
-const WIDE_FIELDS: usize = 64;
-const BINARY_BYTES: usize = 64;
-const WIDE_PROJECTION_TOTAL_FIELDS: usize = 100; // 100 fields total, select only 3
-
-fn decode_and_flush(decoder: &mut Decoder, data: &[u8]) {
-    let mut offset = 0;
-    while offset < data.len() {
-        let read = decoder.decode(black_box(&data[offset..])).unwrap();
-        if read == 0 {
-            break;
-        }
-        offset += read;
-        while let Some(_batch) = decoder.flush().unwrap() {}
-    }
-}
-
-fn build_schema(field_count: usize) -> Arc<Schema> {
-    // Builds a schema with fields named f0..f{field_count-1}, all Int64 and non-nullable.
-    let fields: Vec<Field> = (0..field_count)
-        .map(|i| Field::new(format!("f{i}"), DataType::Int64, false))
-        .collect();
-    Arc::new(Schema::new(fields))
-}
-
-fn build_projection_schema(indices: &[usize]) -> Arc<Schema> {
-    let fields: Vec<Field> = indices
-        .iter()
-        .map(|i| Field::new(format!("f{i}"), DataType::Int64, false))
-        .collect();
-    Arc::new(Schema::new(fields))
-}
-
-fn build_wide_json(rows: usize, fields: usize) -> Vec<u8> {
-    // Builds newline-delimited JSON objects with "wide" schema.
-    // Example (rows=2, fields=3):
-    // {"f0":0,"f1":1,"f2":2}
-    // {"f0":1,"f1":2,"f2":3}
-    let mut out = String::with_capacity(rows * fields * 12);
-    for row in 0..rows {
-        out.push('{');
-        for field in 0..fields {
-            if field > 0 {
-                out.push(',');
-            }
-            let value = row as i64 + field as i64;
-            write!(&mut out, "\"f{field}\":{value}").unwrap();
-        }
-        out.push('}');
-        out.push('\n');
-    }
-    out.into_bytes()
-}
-
-fn build_wide_values(rows: usize, fields: usize) -> Vec<Value> {
-    // Mirrors build_wide_json but returns structured serde_json::Value objects.
-    let mut out = Vec::with_capacity(rows);
-    for row in 0..rows {
-        let mut map = Map::with_capacity(fields);
-        for field in 0..fields {
-            let key = format!("f{field}");
-            let value = Number::from((row + field) as i64);
-            map.insert(key, Value::Number(value));
-        }
-        out.push(Value::Object(map));
-    }
-    out
-}
-
-fn bench_decode_wide_object(c: &mut Criterion) {
-    let data = build_wide_json(ROWS, WIDE_FIELDS);
-    let schema = build_schema(WIDE_FIELDS);
-
-    c.bench_function("decode_wide_object_i64_json", |b| {
-        b.iter(|| {
-            let mut decoder = ReaderBuilder::new(schema.clone())
-                .with_batch_size(BATCH_SIZE)
-                .build_decoder()
-                .unwrap();
-            decode_and_flush(&mut decoder, &data);
-        })
-    });
-}
-
-fn bench_serialize_wide_object(c: &mut Criterion) {
-    let values = build_wide_values(ROWS, WIDE_FIELDS);
-    let schema = build_schema(WIDE_FIELDS);
-
-    c.bench_function("decode_wide_object_i64_serialize", |b| {
-        b.iter(|| {
-            let mut decoder = ReaderBuilder::new(schema.clone())
-                .with_batch_size(BATCH_SIZE)
-                .build_decoder()
-                .unwrap();
-
-            decoder.serialize(&values).unwrap();
-            while let Some(_batch) = decoder.flush().unwrap() {}
-        })
-    });
-}
-
-fn bench_decode_binary(c: &mut Criterion, name: &str, data: &[u8], field: Arc<Field>) {
-    c.bench_function(name, |b| {
-        b.iter(|| {
-            let mut decoder = ReaderBuilder::new_with_field(field.clone())
-                .with_batch_size(BATCH_SIZE)
-                .build_decoder()
-                .unwrap();
-            decode_and_flush(&mut decoder, data);
-        })
-    });
-}
-
-#[inline]
-fn append_hex_byte(buf: &mut String, byte: u8) {
-    const HEX: &[u8; 16] = b"0123456789abcdef";
-    buf.push(HEX[(byte >> 4) as usize] as char);
-    buf.push(HEX[(byte & 0x0f) as usize] as char);
-}
-
-fn build_hex_lines(rows: usize, bytes_per_row: usize) -> Vec<u8> {
-    let mut data = String::with_capacity(rows * (bytes_per_row * 2 + 3));
-    for row in 0..rows {
-        data.push('"');
-        for i in 0..bytes_per_row {
-            let byte = ((row + i) & 0xff) as u8;
-            append_hex_byte(&mut data, byte);
-        }
-        data.push('"');
-        data.push('\n');
-    }
-    data.into_bytes()
-}
-
-fn bench_binary_hex(c: &mut Criterion) {
-    let binary_data = build_hex_lines(ROWS, BINARY_BYTES);
-
-    let binary_field = Arc::new(Field::new("item", DataType::Binary, false));
-    bench_decode_binary(c, "decode_binary_hex_json", &binary_data, binary_field);
-
-    let fixed_field = Arc::new(Field::new(
-        "item",
-        DataType::FixedSizeBinary(BINARY_BYTES as i32),
-        false,
-    ));
-    bench_decode_binary(c, "decode_fixed_binary_hex_json", &binary_data, fixed_field);
-
-    let view_field = Arc::new(Field::new("item", DataType::BinaryView, false));
-    bench_decode_binary(c, "decode_binary_view_hex_json", &binary_data, view_field);
-}
-
-fn bench_decode_schema(c: &mut Criterion, name: &str, data: &[u8], schema: Arc<Schema>) {
-    let mut group = c.benchmark_group(name);
-    group.throughput(Throughput::Bytes(data.len() as u64));
-    group.sample_size(50);
-    group.measurement_time(std::time::Duration::from_secs(5));
-    group.warm_up_time(std::time::Duration::from_secs(2));
-    group.sampling_mode(SamplingMode::Flat);
-    group.bench_function(BenchmarkId::from_parameter(ROWS), |b| {
-        b.iter(|| {
-            let mut decoder = ReaderBuilder::new(schema.clone())
-                .with_batch_size(BATCH_SIZE)
-                .build_decoder()
-                .unwrap();
-            decode_and_flush(&mut decoder, data);
-        })
-    });
-    group.finish();
-}
-
-fn build_wide_projection_json(rows: usize, total_fields: usize) -> Vec<u8> {
-    // Estimate: each field ~15 bytes ("fXX":VVVVVVV,), total ~15*100 + overhead
-    let per_row_size = total_fields * 15 + 10;
-    let mut data = String::with_capacity(rows * per_row_size);
-
-    for _row in 0..rows {
-        data.push('{');
-        for i in 0..total_fields {
-            if i > 0 {
-                data.push(',');
-            }
-            // Use fixed-width values for stable benchmarks: 7 digits
-            let _ = write!(data, "\"f{}\":{:07}", i, i);
-        }
-        data.push('}');
-        data.push('\n');
-    }
-    data.into_bytes()
-}
-
-fn bench_wide_projection(c: &mut Criterion) {
-    // Wide projection workload: tests overhead of parsing unused fields
-    let wide_projection_data = build_wide_projection_json(ROWS, WIDE_PROJECTION_TOTAL_FIELDS);
-
-    let full_schema = build_schema(WIDE_PROJECTION_TOTAL_FIELDS);
-    bench_decode_schema(
-        c,
-        "decode_wide_projection_full_json",
-        &wide_projection_data,
-        full_schema,
-    );
-
-    // Projected schema: only 3 fields (f0, f10, f50) out of 100
-    let projected_schema = build_projection_schema(&[0, 10, 50]);
-    bench_decode_schema(
-        c,
-        "decode_wide_projection_narrow_json",
-        &wide_projection_data,
-        projected_schema,
-    );
-}
-
-criterion_group!(
-    benches,
-    bench_decode_wide_object,
-    bench_serialize_wide_object,
-    bench_binary_hex,
-    bench_wide_projection
-);
-criterion_main!(benches);
diff --git a/arrow-json/benches/json_reader.rs b/arrow-json/benches/json_reader.rs
new file mode 100644
index 000000000000..9d0dd8e9a108
--- /dev/null
+++ b/arrow-json/benches/json_reader.rs
@@ -0,0 +1,580 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arbitrary::{Arbitrary, Unstructured};
+use arrow_json::ReaderBuilder;
+use arrow_json::reader::{Decoder, infer_json_schema};
+use arrow_schema::{DataType, Field, Schema};
+use criterion::{
+    BenchmarkId, Criterion, SamplingMode, Throughput, criterion_group, criterion_main,
+};
+use serde::Serialize;
+use serde_json::{Map, Number, Value};
+use std::fmt::Write;
+use std::hint::black_box;
+use std::sync::Arc;
+
+// Shared
+const ROWS: usize = 1 << 17; // 128K rows
+const BATCH_SIZE: usize = 1 << 13; // 8K rows per batch
+
+// Wide object / struct
+const WIDE_FIELDS: usize = 64;
+const WIDE_PROJECTION_TOTAL_FIELDS: usize = 100; // 100 fields total, select only 3
+
+// Binary
+const BINARY_BYTES: usize = 64;
+
+// List
+const SHORT_LIST_ELEMENTS: usize = 5;
+const LONG_LIST_ELEMENTS: usize = 100;
+
+// Map
+const SMALL_MAP_ENTRIES: usize = 5;
+const LARGE_MAP_ENTRIES: usize = 50;
+
+// Run-end encoded
+const SHORT_REE_RUN_LENGTH: usize = 2;
+const LONG_REE_RUN_LENGTH: usize = 100;
+
+fn decode_and_flush(decoder: &mut Decoder, data: &[u8]) {
+    let mut offset = 0;
+    while offset < data.len() {
+        let read = decoder.decode(black_box(&data[offset..])).unwrap();
+        if read == 0 {
+            break;
+        }
+        offset += read;
+        while let Some(_batch) = decoder.flush().unwrap() {}
+    }
+}
+
+fn build_schema(field_count: usize) -> Arc<Schema> {
+    // Builds a schema with fields named f0..f{field_count-1}, all Int64 and non-nullable.
+    let fields: Vec<Field> = (0..field_count)
+        .map(|i| Field::new(format!("f{i}"), DataType::Int64, false))
+        .collect();
+    Arc::new(Schema::new(fields))
+}
+
+fn build_projection_schema(indices: &[usize]) -> Arc<Schema> {
+    let fields: Vec<Field> = indices
+        .iter()
+        .map(|i| Field::new(format!("f{i}"), DataType::Int64, false))
+        .collect();
+    Arc::new(Schema::new(fields))
+}
+
+fn build_wide_json(rows: usize, fields: usize) -> Vec<u8> {
+    // Builds newline-delimited JSON objects with "wide" schema.
+    // Example (rows=2, fields=3):
+    // {"f0":0,"f1":1,"f2":2}
+    // {"f0":1,"f1":2,"f2":3}
+    let mut out = String::with_capacity(rows * fields * 12);
+    for row in 0..rows {
+        out.push('{');
+        for field in 0..fields {
+            if field > 0 {
+                out.push(',');
+            }
+            let value = row as i64 + field as i64;
+            write!(&mut out, "\"f{field}\":{value}").unwrap();
+        }
+        out.push('}');
+        out.push('\n');
+    }
+    out.into_bytes()
+}
+
+fn build_wide_values(rows: usize, fields: usize) -> Vec<Value> {
+    // Mirrors build_wide_json but returns structured serde_json::Value objects.
+    let mut out = Vec::with_capacity(rows);
+    for row in 0..rows {
+        let mut map = Map::with_capacity(fields);
+        for field in 0..fields {
+            let key = format!("f{field}");
+            let value = Number::from((row + field) as i64);
+            map.insert(key, Value::Number(value));
+        }
+        out.push(Value::Object(map));
+    }
+    out
+}
+
+fn bench_decode_wide_object(c: &mut Criterion) {
+    let data = build_wide_json(ROWS, WIDE_FIELDS);
+    let schema = build_schema(WIDE_FIELDS);
+
+    c.bench_function("decode_wide_object_i64_json", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decode_and_flush(&mut decoder, &data);
+        })
+    });
+}
+
+fn bench_serialize_wide_object(c: &mut Criterion) {
+    let values = build_wide_values(ROWS, WIDE_FIELDS);
+    let schema = build_schema(WIDE_FIELDS);
+
+    c.bench_function("decode_wide_object_i64_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+
+            decoder.serialize(&values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+}
+
+fn bench_decode_binary(c: &mut Criterion, name: &str, data: &[u8], field: Arc<Field>) {
+    c.bench_function(name, |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new_with_field(field.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decode_and_flush(&mut decoder, data);
+        })
+    });
+}
+
+#[inline]
+fn append_hex_byte(buf: &mut String, byte: u8) {
+    const HEX: &[u8; 16] = b"0123456789abcdef";
+    buf.push(HEX[(byte >> 4) as usize] as char);
+    buf.push(HEX[(byte & 0x0f) as usize] as char);
+}
+
+fn build_hex_lines(rows: usize, bytes_per_row: usize) -> Vec<u8> {
+    let mut data = String::with_capacity(rows * (bytes_per_row * 2 + 3));
+    for row in 0..rows {
+        data.push('"');
+        for i in 0..bytes_per_row {
+            let byte = ((row + i) & 0xff) as u8;
+            append_hex_byte(&mut data, byte);
+        }
+        data.push('"');
+        data.push('\n');
+    }
+    data.into_bytes()
+}
+
+fn bench_binary_hex(c: &mut Criterion) {
+    let binary_data = build_hex_lines(ROWS, BINARY_BYTES);
+
+    let binary_field = Arc::new(Field::new("item", DataType::Binary, false));
+    bench_decode_binary(c, "decode_binary_hex_json", &binary_data, binary_field);
+
+    let fixed_field = Arc::new(Field::new(
+        "item",
+        DataType::FixedSizeBinary(BINARY_BYTES as i32),
+        false,
+    ));
+    bench_decode_binary(c, "decode_fixed_binary_hex_json", &binary_data, fixed_field);
+
+    let view_field = Arc::new(Field::new("item", DataType::BinaryView, false));
+    bench_decode_binary(c, "decode_binary_view_hex_json", &binary_data, view_field);
+}
+
+fn bench_decode_schema(c: &mut Criterion, name: &str, data: &[u8], schema: Arc<Schema>) {
+    let mut group = c.benchmark_group(name);
+    group.throughput(Throughput::Bytes(data.len() as u64));
+    group.sample_size(50);
+    group.measurement_time(std::time::Duration::from_secs(5));
+    group.warm_up_time(std::time::Duration::from_secs(2));
+    group.sampling_mode(SamplingMode::Flat);
+    group.bench_function(BenchmarkId::from_parameter(ROWS), |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decode_and_flush(&mut decoder, data);
+        })
+    });
+    group.finish();
+}
+
+fn build_wide_projection_json(rows: usize, total_fields: usize) -> Vec<u8> {
+    // Estimate: each field ~15 bytes ("fXX":VVVVVVV,), total ~15*100 + overhead
+    let per_row_size = total_fields * 15 + 10;
+    let mut data = String::with_capacity(rows * per_row_size);
+
+    for _row in 0..rows {
+        data.push('{');
+        for i in 0..total_fields {
+            if i > 0 {
+                data.push(',');
+            }
+            // Use fixed-width values for stable benchmarks: 7 digits
+            let _ = write!(data, "\"f{}\":{:07}", i, i);
+        }
+        data.push('}');
+        data.push('\n');
+    }
+    data.into_bytes()
+}
+
+fn bench_wide_projection(c: &mut Criterion) {
+    // Wide projection workload: tests overhead of parsing unused fields
+    let wide_projection_data = build_wide_projection_json(ROWS, WIDE_PROJECTION_TOTAL_FIELDS);
+
+    let full_schema = build_schema(WIDE_PROJECTION_TOTAL_FIELDS);
+    bench_decode_schema(
+        c,
+        "decode_wide_projection_full_json",
+        &wide_projection_data,
+        full_schema,
+    );
+
+    // Projected schema: only 3 fields (f0, f10, f50) out of 100
+    let projected_schema = build_projection_schema(&[0, 10, 50]);
+    bench_decode_schema(
+        c,
+        "decode_wide_projection_narrow_json",
+        &wide_projection_data,
+        projected_schema,
+    );
+}
+
+fn build_list_json(rows: usize, elements: usize) -> Vec<u8> {
+    // Builds newline-delimited JSON objects with a single list field.
+    // Example (rows=2, elements=3):
+    // {"list":[0,1,2]}
+    // {"list":[1,2,3]}
+    let mut out = String::with_capacity(rows * (elements * 6 + 16));
+    for row in 0..rows {
+        out.push_str("{\"list\":[");
+        for i in 0..elements {
+            if i > 0 {
+                out.push(',');
+            }
+            write!(&mut out, "{}", (row + i) as i64).unwrap();
+        }
+        out.push_str("]}\n");
+    }
+    out.into_bytes()
+}
+
+fn build_list_values(rows: usize, elements: usize) -> Vec<Value> {
+    // Mirrors build_list_json but returns structured serde_json::Value objects.
+    let mut out = Vec::with_capacity(rows);
+    for row in 0..rows {
+        let arr: Vec<Value> = (0..elements)
+            .map(|i| Value::Number(Number::from((row + i) as i64)))
+            .collect();
+        let mut map = Map::with_capacity(1);
+        map.insert("list".to_string(), Value::Array(arr));
+        out.push(Value::Object(map));
+    }
+    out
+}
+
+fn build_list_schema() -> Arc<Schema> {
+    Arc::new(Schema::new(vec![Field::new(
+        "list",
+        DataType::List(Arc::new(Field::new_list_field(DataType::Int64, false))),
+        false,
+    )]))
+}
+
+fn bench_decode_list(c: &mut Criterion) {
+    let schema = build_list_schema();
+
+    // Short lists: tests list handling overhead (few elements per row)
+    let short_data = build_list_json(ROWS, SHORT_LIST_ELEMENTS);
+    bench_decode_schema(c, "decode_short_list_i64_json", &short_data, schema.clone());
+
+    // Long lists: tests child element decode throughput (many elements per row)
+    let long_data = build_list_json(ROWS, LONG_LIST_ELEMENTS);
+    bench_decode_schema(c, "decode_long_list_i64_json", &long_data, schema);
+}
+
+fn bench_serialize_list(c: &mut Criterion) {
+    let schema = build_list_schema();
+
+    let short_values = build_list_values(ROWS, SHORT_LIST_ELEMENTS);
+    c.bench_function("decode_short_list_i64_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(&short_values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+
+    let long_values = build_list_values(ROWS, LONG_LIST_ELEMENTS);
+    c.bench_function("decode_long_list_i64_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(&long_values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+}
+
+fn build_map_json(rows: usize, entries: usize) -> Vec<u8> {
+    let mut out = String::with_capacity(rows * (entries * 20 + 16));
+    for row in 0..rows {
+        out.push_str("{\"map\":{");
+        for i in 0..entries {
+            if i > 0 {
+                out.push(',');
+            }
+            write!(&mut out, "\"k{}\":{}", i, (row + i) as i64).unwrap();
+        }
+        out.push_str("}}\n");
+    }
+    out.into_bytes()
+}
+
+fn build_map_values(rows: usize, entries: usize) -> Vec<Value> {
+    let mut out = Vec::with_capacity(rows);
+    for row in 0..rows {
+        let mut inner = Map::with_capacity(entries);
+        for i in 0..entries {
+            inner.insert(
+                format!("k{i}"),
+                Value::Number(Number::from((row + i) as i64)),
+            );
+        }
+        let mut map = Map::with_capacity(1);
+        map.insert("map".to_string(), Value::Object(inner));
+        out.push(Value::Object(map));
+    }
+    out
+}
+
+fn build_map_schema() -> Arc<Schema> {
+    let entries_field = Arc::new(Field::new(
+        "entries",
+        DataType::Struct(
+            vec![
+                Field::new("keys", DataType::Utf8, false),
+                Field::new("values", DataType::Int64, true),
+            ]
+            .into(),
+        ),
+        false,
+    ));
+    Arc::new(Schema::new(vec![Field::new(
+        "map",
+        DataType::Map(entries_field, false),
+        false,
+    )]))
+}
+
+fn bench_decode_map(c: &mut Criterion) {
+    let schema = build_map_schema();
+
+    let small_data = build_map_json(ROWS, SMALL_MAP_ENTRIES);
+    bench_decode_schema(c, "decode_small_map_json", &small_data, schema.clone());
+
+    let large_data = build_map_json(ROWS, LARGE_MAP_ENTRIES);
+    bench_decode_schema(c, "decode_large_map_json", &large_data, schema);
+}
+
+fn bench_serialize_map(c: &mut Criterion) {
+    let schema = build_map_schema();
+
+    let small_values = build_map_values(ROWS, SMALL_MAP_ENTRIES);
+    c.bench_function("decode_small_map_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(&small_values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+
+    let large_values = build_map_values(ROWS, LARGE_MAP_ENTRIES);
+    c.bench_function("decode_large_map_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(&large_values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+}
+
+fn build_ree_json(rows: usize, run_length: usize) -> Vec<u8> {
+    let mut out = String::with_capacity(rows * 24);
+    for row in 0..rows {
+        let value = (row / run_length) as i64;
+        writeln!(&mut out, "{{\"val\":{value}}}").unwrap();
+    }
+    out.into_bytes()
+}
+
+fn build_ree_values(rows: usize, run_length: usize) -> Vec<Value> {
+    let mut out = Vec::with_capacity(rows);
+    for row in 0..rows {
+        let value = (row / run_length) as i64;
+        let mut map = Map::with_capacity(1);
+        map.insert("val".to_string(), Value::Number(Number::from(value)));
+        out.push(Value::Object(map));
+    }
+    out
+}
+
+fn build_ree_schema() -> Arc<Schema> {
+    let ree_type = DataType::RunEndEncoded(
+        Arc::new(Field::new("run_ends", DataType::Int32, false)),
+        Arc::new(Field::new("values", DataType::Int64, true)),
+    );
+    Arc::new(Schema::new(vec![Field::new("val", ree_type, false)]))
+}
+
+fn bench_decode_ree(c: &mut Criterion) {
+    let schema = build_ree_schema();
+
+    let short_data = build_ree_json(ROWS, SHORT_REE_RUN_LENGTH);
+    bench_decode_schema(c, "decode_short_ree_runs_json", &short_data, schema.clone());
+
+    let long_data = build_ree_json(ROWS, LONG_REE_RUN_LENGTH);
+    bench_decode_schema(c, "decode_long_ree_runs_json", &long_data, schema);
+}
+
+fn bench_serialize_ree(c: &mut Criterion) {
+    let schema = build_ree_schema();
+
+    let short_values = build_ree_values(ROWS, SHORT_REE_RUN_LENGTH);
+    c.bench_function("decode_short_ree_runs_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(&short_values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+
+    let long_values = build_ree_values(ROWS, LONG_REE_RUN_LENGTH);
+    c.bench_function("decode_long_ree_runs_serialize", |b| {
+        b.iter(|| {
+            let mut decoder = ReaderBuilder::new(schema.clone())
+                .with_batch_size(BATCH_SIZE)
+                .build_decoder()
+                .unwrap();
+            decoder.serialize(&long_values).unwrap();
+            while let Some(_batch) = decoder.flush().unwrap() {}
+        })
+    });
+}
+
+fn bench_schema_inference(c: &mut Criterion) {
+    const ROWS: usize = 1000;
+
+    #[derive(Serialize, Arbitrary, Debug)]
+    struct Row {
+        a: Option<i16>,
+        b: Option<String>,
+        c: Option<[i16; 8]>,
+        d: Option<[bool; 8]>,
+        e: Option<Inner>,
+        f: f64,
+    }
+
+    #[derive(Serialize, Arbitrary, Debug)]
+    struct Inner {
+        a: Option<i16>,
+        b: Option<String>,
+        c: Option<bool>,
+    }
+
+    let mut data = vec![];
+    for row in pseudorandom_sequence::<Row>(ROWS) {
+        serde_json::to_writer(&mut data, &row).unwrap();
+        data.push(b'\n');
+    }
+
+    let mut group = c.benchmark_group("infer_json_schema");
+    group.throughput(Throughput::Bytes(data.len() as u64));
+    group.sample_size(50);
+    group.measurement_time(std::time::Duration::from_secs(5));
+    group.warm_up_time(std::time::Duration::from_secs(2));
+    group.sampling_mode(SamplingMode::Flat);
+    group.bench_function(BenchmarkId::from_parameter(ROWS), |b| {
+        b.iter(|| infer_json_schema(black_box(&data[..]), None).unwrap())
+    });
+    group.finish();
+}
+
+fn pseudorandom_sequence<T: for<'a> Arbitrary<'a>>(len: usize) -> Vec<T> {
+    static RAND_BYTES: &[u8; 255] = &[
+        12, 135, 254, 243, 18, 5, 38, 175, 60, 58, 204, 103, 15, 88, 201, 199, 57, 63, 56, 234,
+        106, 111, 238, 119, 214, 50, 110, 89, 129, 185, 112, 115, 35, 239, 188, 189, 49, 184, 194,
+        146, 108, 131, 213, 43, 236, 81, 61, 20, 21, 52, 223, 220, 215, 74, 210, 27, 190, 107, 174,
+        142, 237, 66, 75, 1, 53, 181, 82, 158, 68, 134, 176, 229, 157, 116, 233, 153, 84, 139, 151,
+        8, 171, 59, 105, 242, 40, 69, 94, 170, 4, 187, 212, 156, 65, 90, 192, 216, 29, 222, 122,
+        230, 198, 154, 155, 245, 45, 178, 123, 23, 117, 168, 149, 17, 177, 48, 54, 241, 202, 44,
+        232, 64, 221, 252, 161, 91, 93, 143, 240, 102, 172, 209, 224, 186, 197, 219, 247, 71, 36,
+        101, 133, 113, 6, 137, 231, 162, 31, 7, 22, 138, 47, 136, 2, 244, 141, 173, 99, 25, 95, 96,
+        85, 249, 42, 251, 217, 16, 205, 98, 203, 92, 114, 14, 163, 150, 144, 10, 125, 13, 195, 72,
+        41, 67, 246, 11, 77, 132, 83, 37, 24, 183, 226, 250, 109, 248, 33, 76, 9, 55, 159, 34, 62,
+        196, 87, 3, 39, 28, 166, 167, 255, 206, 79, 191, 228, 193, 179, 97, 182, 148, 73, 120, 211,
+        253, 70, 227, 51, 169, 130, 145, 218, 78, 180, 165, 46, 127, 152, 26, 140, 207, 19, 100,
+        104, 80, 164, 126, 118, 200, 128, 86, 160, 32, 30, 225, 147, 124, 121, 235, 208,
+    ];
+
+    let bytes: Vec<u8> = RAND_BYTES
+        .iter()
+        .flat_map(|i| RAND_BYTES.map(|j| i.wrapping_add(j)))
+        .take(1000 * len)
+        .collect();
+
+    let mut u = Unstructured::new(&bytes);
+
+    (0..len)
+        .map(|_| u.arbitrary::<T>().unwrap())
+        .take(len)
+        .collect()
+}
+
+criterion_group!(
+    benches,
+    bench_decode_wide_object,
+    bench_serialize_wide_object,
+    bench_binary_hex,
+    bench_wide_projection,
+    bench_decode_list,
+    bench_serialize_list,
+    bench_decode_map,
+    bench_serialize_map,
+    bench_decode_ree,
+    bench_serialize_ree,
+    bench_schema_inference
+);
+criterion_main!(benches);
diff --git a/arrow-json/benches/json_writer.rs b/arrow-json/benches/json_writer.rs
new file mode 100644
index 000000000000..b37ea542efee
--- /dev/null
+++ b/arrow-json/benches/json_writer.rs
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use arrow_array::builder::{FixedSizeListBuilder, Int64Builder, ListBuilder};
+use arrow_array::{Array, RecordBatch};
+use arrow_json::LineDelimitedWriter;
+use arrow_schema::{Field, Schema};
+use criterion::{Criterion, Throughput, criterion_group, criterion_main};
+use std::sync::Arc;
+
+const ROWS: usize = 1 << 17; // 128K rows
+const LIST_SHORT_ELEMENTS: usize = 5;
+const LIST_LONG_ELEMENTS: usize = 100;
+
+fn build_list_batch(rows: usize, elements: usize) -> RecordBatch {
+    let mut list_builder = ListBuilder::new(Int64Builder::new());
+    for row in 0..rows {
+        for i in 0..elements {
+            list_builder.values().append_value((row + i) as i64);
+        }
+        list_builder.append(true);
+    }
+    let list_array = list_builder.finish();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "list",
+        list_array.data_type().clone(),
+        false,
+    )]));
+
+    RecordBatch::try_new(schema, vec![Arc::new(list_array)]).unwrap()
+}
+
+fn bench_write_list(c: &mut Criterion) {
+    let short_batch = build_list_batch(ROWS, LIST_SHORT_ELEMENTS);
+    let long_batch = build_list_batch(ROWS, LIST_LONG_ELEMENTS);
+
+    let mut group = c.benchmark_group("write_list_i64");
+    // Short lists: tests per-list overhead (few elements per row)
+    group.throughput(Throughput::Elements(ROWS as u64));
+    group.bench_function("short", |b| {
+        let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write(&short_batch).unwrap();
+            writer.finish().unwrap();
+        })
+    });
+
+    // Long lists: tests child element encode throughput (many elements per row)
+    group.bench_function("long", |b| {
+        let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write(&long_batch).unwrap();
+            writer.finish().unwrap();
+        })
+    });
+
+    group.finish();
+}
+
+fn build_fixed_size_list_batch(rows: usize, elements: usize) -> RecordBatch {
+    let mut builder = FixedSizeListBuilder::new(Int64Builder::new(), elements as i32);
+    for row in 0..rows {
+        for i in 0..elements {
+            builder.values().append_value((row + i) as i64);
+        }
+        builder.append(true);
+    }
+    let list_array = builder.finish();
+
+    let schema = Arc::new(Schema::new(vec![Field::new(
+        "fixed_size_list",
+        list_array.data_type().clone(),
+        false,
+    )]));
+
+    RecordBatch::try_new(schema, vec![Arc::new(list_array)]).unwrap()
+}
+
+fn bench_write_fixed_size_list(c: &mut Criterion) {
+    let short_batch = build_fixed_size_list_batch(ROWS, LIST_SHORT_ELEMENTS);
+    let long_batch = build_fixed_size_list_batch(ROWS, LIST_LONG_ELEMENTS);
+
+    let mut group = c.benchmark_group("write_fixed_size_list_i64");
+    group.throughput(Throughput::Elements(ROWS as u64));
+
+    group.bench_function("short", |b| {
+        let mut buf = Vec::with_capacity(ROWS * LIST_SHORT_ELEMENTS * 8);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write(&short_batch).unwrap();
+            writer.finish().unwrap();
+        })
+    });
+
+    group.bench_function("long", |b| {
+        let mut buf = Vec::with_capacity(ROWS * LIST_LONG_ELEMENTS * 8);
+        b.iter(|| {
+            buf.clear();
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write(&long_batch).unwrap();
+            writer.finish().unwrap();
+        })
+    });
+
+    group.finish();
+}
+
+criterion_group!(benches, bench_write_list, bench_write_fixed_size_list);
+criterion_main!(benches);
diff --git a/arrow-json/src/lib.rs b/arrow-json/src/lib.rs
index 1b18e0094708..201c3cd80749 100644
--- a/arrow-json/src/lib.rs
+++ b/arrow-json/src/lib.rs
@@ -179,17 +179,17 @@ impl JsonSerializable for f64 {
 
 #[cfg(test)]
 mod tests {
-    use std::sync::Arc;
-
-    use crate::writer::JsonArray;
-
     use super::*;
-
+    use crate::writer::JsonArray;
+    use crate::writer::LineDelimited;
     use arrow_array::{
-        ArrayRef, GenericBinaryArray, GenericByteViewArray, RecordBatch, RecordBatchWriter,
-        builder::FixedSizeBinaryBuilder, types::BinaryViewType,
+        ArrayRef, GenericBinaryArray, GenericByteViewArray, GenericListViewArray, RecordBatch,
+        RecordBatchWriter, builder::FixedSizeBinaryBuilder, types::BinaryViewType,
     };
+    use arrow_schema::{DataType, Field, Fields, Schema};
     use serde_json::Value::{Bool, Number as VNumber, String as VString};
+    use std::io::Cursor;
+    use std::sync::Arc;
 
     #[test]
     fn test_arrow_native_type_to_json() {
@@ -216,13 +216,6 @@ mod tests {
 
     #[test]
     fn test_json_roundtrip_structs() {
-        use crate::writer::LineDelimited;
-        use arrow_schema::DataType;
-        use arrow_schema::Field;
-        use arrow_schema::Fields;
-        use arrow_schema::Schema;
-        use std::sync::Arc;
-
         let schema = Arc::new(Schema::new(vec![
             Field::new(
                 "c1",
@@ -352,4 +345,49 @@ mod tests {
 
         assert_eq!(batch, decoded);
     }
+
+    fn assert_list_view_roundtrip<O: arrow_array::OffsetSizeTrait>() {
+        let flat_field = Arc::new(Field::new("item", DataType::Int32, true));
+        let flat_dt = GenericListViewArray::<O>::DATA_TYPE_CONSTRUCTOR(flat_field);
+
+        let nested_inner = Arc::new(Field::new("item", DataType::Int32, false));
+        let nested_inner_dt = GenericListViewArray::<O>::DATA_TYPE_CONSTRUCTOR(nested_inner);
+        let nested_outer = Arc::new(Field::new("item", nested_inner_dt, true));
+        let nested_dt = GenericListViewArray::<O>::DATA_TYPE_CONSTRUCTOR(nested_outer);
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("flat", flat_dt, true),
+            Field::new("nested", nested_dt, true),
+        ]));
+
+        let input = r#"{"flat":[1,2,3],"nested":[[1,2],[3]]}
+{"flat":[4,null]}
+{}
+{"flat":[6],"nested":[[4,5,6]]}
+{"flat":[]}
+"#
+        .as_bytes();
+
+        let batches: Vec<RecordBatch> = ReaderBuilder::new(schema.clone())
+            .with_batch_size(1024)
+            .build(Cursor::new(input))
+            .unwrap()
+            .collect::<Result<Vec<_>, _>>()
+            .unwrap();
+
+        let mut output = Vec::new();
+        let mut writer = WriterBuilder::new().build::<_, LineDelimited>(&mut output);
+        for batch in &batches {
+            writer.write(batch).unwrap();
+        }
+        writer.finish().unwrap();
+
+        assert_eq!(input, &output);
+    }
+
+    #[test]
+    fn test_json_roundtrip_list_view() {
+        assert_list_view_roundtrip::<i32>();
+        assert_list_view_roundtrip::<i64>();
+    }
 }
diff --git a/arrow-json/src/reader/list_array.rs b/arrow-json/src/reader/list_array.rs
index d363b6be9780..ea23403c4b18 100644
--- a/arrow-json/src/reader/list_array.rs
+++ b/arrow-json/src/reader/list_array.rs
@@ -18,28 +18,33 @@
 use crate::reader::tape::{Tape, TapeElement};
 use crate::reader::{ArrayDecoder, DecoderContext};
 use arrow_array::OffsetSizeTrait;
-use arrow_array::builder::{BooleanBufferBuilder, BufferBuilder};
-use arrow_buffer::buffer::NullBuffer;
+use arrow_array::builder::BooleanBufferBuilder;
+use arrow_buffer::{Buffer, buffer::NullBuffer};
 use arrow_data::{ArrayData, ArrayDataBuilder};
 use arrow_schema::{ArrowError, DataType};
 use std::marker::PhantomData;
 
-pub struct ListArrayDecoder<O> {
+pub type ListArrayDecoder<O> = ListLikeArrayDecoder<O, false>;
+pub type ListViewArrayDecoder<O> = ListLikeArrayDecoder<O, true>;
+
+pub struct ListLikeArrayDecoder<O, const IS_VIEW: bool> {
     data_type: DataType,
     decoder: Box<dyn ArrayDecoder>,
     phantom: PhantomData<O>,
     is_nullable: bool,
 }
 
-impl<O: OffsetSizeTrait> ListArrayDecoder<O> {
+impl<O: OffsetSizeTrait, const IS_VIEW: bool> ListLikeArrayDecoder<O, IS_VIEW> {
     pub fn new(
         ctx: &DecoderContext,
         data_type: &DataType,
         is_nullable: bool,
     ) -> Result<Self, ArrowError> {
-        let field = match data_type {
-            DataType::List(f) if !O::IS_LARGE => f,
-            DataType::LargeList(f) if O::IS_LARGE => f,
+        let field = match (IS_VIEW, data_type) {
+            (false, DataType::List(f)) if !O::IS_LARGE => f,
+            (false, DataType::LargeList(f)) if O::IS_LARGE => f,
+            (true, DataType::ListView(f)) if !O::IS_LARGE => f,
+            (true, DataType::LargeListView(f)) if O::IS_LARGE => f,
             _ => unreachable!(),
         };
         let decoder = ctx.make_decoder(field.data_type(), field.is_nullable())?;
@@ -53,11 +58,11 @@ impl<O: OffsetSizeTrait> ListArrayDecoder<O> {
     }
 }
 
-impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
+impl<O: OffsetSizeTrait, const IS_VIEW: bool> ArrayDecoder for ListLikeArrayDecoder<O, IS_VIEW> {
     fn decode(&mut self, tape: &Tape<'_>, pos: &[u32]) -> Result<ArrayData, ArrowError> {
         let mut child_pos = Vec::with_capacity(pos.len());
-        let mut offsets = BufferBuilder::<O>::new(pos.len() + 1);
-        offsets.append(O::from_usize(0).unwrap());
+        let mut offsets = Vec::with_capacity(pos.len() + 1);
+        offsets.push(O::from_usize(0).unwrap());
 
         let mut nulls = self
             .is_nullable
@@ -88,18 +93,30 @@ impl<O: OffsetSizeTrait> ArrayDecoder for ListArrayDecoder<O> {
             let offset = O::from_usize(child_pos.len()).ok_or_else(|| {
                 ArrowError::JsonError(format!("offset overflow decoding {}", self.data_type))
             })?;
-            offsets.append(offset)
+            offsets.push(offset);
         }
 
         let child_data = self.decoder.decode(tape, &child_pos)?;
         let nulls = nulls.as_mut().map(|x| NullBuffer::new(x.finish()));
 
-        let data = ArrayDataBuilder::new(self.data_type.clone())
+        let mut data = ArrayDataBuilder::new(self.data_type.clone())
             .len(pos.len())
             .nulls(nulls)
-            .add_buffer(offsets.finish())
             .child_data(vec![child_data]);
 
+        if IS_VIEW {
+            let mut sizes = Vec::with_capacity(offsets.len() - 1);
+            for i in 1..offsets.len() {
+                sizes.push(offsets[i] - offsets[i - 1]);
+            }
+            offsets.pop();
+            data = data
+                .add_buffer(Buffer::from_vec(offsets))
+                .add_buffer(Buffer::from_vec(sizes));
+        } else {
+            data = data.add_buffer(Buffer::from_vec(offsets));
+        }
+
         // Safety
         // Validated lengths above
         Ok(unsafe { data.build_unchecked() })
diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs
index 786cf9212d04..7039d3500ece 100644
--- a/arrow-json/src/reader/mod.rs
+++ b/arrow-json/src/reader/mod.rs
@@ -150,10 +150,11 @@ use arrow_array::{RecordBatch, RecordBatchReader, StructArray, downcast_integer,
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType, FieldRef, Schema, SchemaRef, TimeUnit};
 pub use schema::*;
+pub use value_iter::ValueIter;
 
 use crate::reader::boolean_array::BooleanArrayDecoder;
 use crate::reader::decimal_array::DecimalArrayDecoder;
-use crate::reader::list_array::ListArrayDecoder;
+use crate::reader::list_array::{ListArrayDecoder, ListViewArrayDecoder};
 use crate::reader::map_array::MapArrayDecoder;
 use crate::reader::null_array::NullArrayDecoder;
 use crate::reader::primitive_array::PrimitiveArrayDecoder;
@@ -179,6 +180,7 @@ mod string_view_array;
 mod struct_array;
 mod tape;
 mod timestamp_array;
+mod value_iter;
 
 /// A builder for [`Reader`] and [`Decoder`]
 pub struct ReaderBuilder {
@@ -790,6 +792,8 @@ fn make_decoder(
         DataType::LargeUtf8 => Ok(Box::new(StringArrayDecoder::<i64>::new(coerce_primitive))),
         DataType::List(_) => Ok(Box::new(ListArrayDecoder::<i32>::new(ctx, data_type, is_nullable)?)),
         DataType::LargeList(_) => Ok(Box::new(ListArrayDecoder::<i64>::new(ctx, data_type, is_nullable)?)),
+        DataType::ListView(_) => Ok(Box::new(ListViewArrayDecoder::<i32>::new(ctx, data_type, is_nullable)?)),
+        DataType::LargeListView(_) => Ok(Box::new(ListViewArrayDecoder::<i64>::new(ctx, data_type, is_nullable)?)),
         DataType::Struct(_) => Ok(Box::new(StructArrayDecoder::new(ctx, data_type, is_nullable)?)),
         DataType::Binary => Ok(Box::new(BinaryArrayDecoder::<i32>::default())),
         DataType::LargeBinary => Ok(Box::new(BinaryArrayDecoder::<i64>::default())),
@@ -813,7 +817,10 @@ mod tests {
     use std::io::{BufReader, Cursor, Seek};
 
     use arrow_array::cast::AsArray;
-    use arrow_array::{Array, BooleanArray, Float64Array, ListArray, StringArray, StringViewArray};
+    use arrow_array::{
+        Array, BooleanArray, Float64Array, GenericListViewArray, ListArray, OffsetSizeTrait,
+        StringArray, StringViewArray,
+    };
     use arrow_buffer::{ArrowNativeType, Buffer};
     use arrow_cast::display::{ArrayFormatter, FormatOptions};
     use arrow_data::ArrayDataBuilder;
@@ -2190,6 +2197,77 @@ mod tests {
         assert_eq!(read, expected);
     }
 
+    fn assert_read_list_view<O: OffsetSizeTrait>() {
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let data_type = GenericListViewArray::<O>::DATA_TYPE_CONSTRUCTOR(field.clone());
+        let schema = Arc::new(Schema::new(vec![Field::new("lv", data_type, true)]));
+
+        let buf = r#"
+        {"lv": [1, 2, 3]}
+        {"lv": [4, null]}
+        {"lv": null}
+        {"lv": [6]}
+        {"lv": []}
+        "#;
+
+        let batches = do_read(buf, 1024, false, false, schema);
+        assert_eq!(batches.len(), 1);
+        let batch = &batches[0];
+        let col = batch.column(0);
+        let list_view = col
+            .as_any()
+            .downcast_ref::<GenericListViewArray<O>>()
+            .unwrap();
+
+        assert_eq!(list_view.len(), 5);
+
+        // Check offsets and sizes
+        let expected_offsets: Vec<O> = vec![0, 3, 5, 5, 6]
+            .into_iter()
+            .map(|v| O::usize_as(v))
+            .collect();
+        let expected_sizes: Vec<O> = vec![3, 2, 0, 1, 0]
+            .into_iter()
+            .map(|v| O::usize_as(v))
+            .collect();
+        assert_eq!(list_view.value_offsets(), &expected_offsets);
+        assert_eq!(list_view.value_sizes(), &expected_sizes);
+
+        // Row 0: [1, 2, 3]
+        assert!(list_view.is_valid(0));
+        let vals = list_view.value(0);
+        let ints = vals.as_primitive::<Int32Type>();
+        assert_eq!(ints.values(), &[1, 2, 3]);
+
+        // Row 1: [4, null]
+        assert!(list_view.is_valid(1));
+        let vals = list_view.value(1);
+        let ints = vals.as_primitive::<Int32Type>();
+        assert_eq!(ints.len(), 2);
+        assert_eq!(ints.value(0), 4);
+        assert!(ints.is_null(1));
+
+        // Row 2: null
+        assert!(list_view.is_null(2));
+
+        // Row 3: [6]
+        assert!(list_view.is_valid(3));
+        let vals = list_view.value(3);
+        let ints = vals.as_primitive::<Int32Type>();
+        assert_eq!(ints.values(), &[6]);
+
+        // Row 4: []
+        assert!(list_view.is_valid(4));
+        let vals = list_view.value(4);
+        assert_eq!(vals.len(), 0);
+    }
+
+    #[test]
+    fn test_read_list_view() {
+        assert_read_list_view::<i32>();
+        assert_read_list_view::<i64>();
+    }
+
     #[test]
     fn test_skip_empty_lines() {
         let schema = Schema::new(vec![Field::new("a", DataType::Int64, true)]);
diff --git a/arrow-json/src/reader/schema.rs b/arrow-json/src/reader/schema.rs
index fb7d93a85e12..524e6b2aa560 100644
--- a/arrow-json/src/reader/schema.rs
+++ b/arrow-json/src/reader/schema.rs
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+use super::ValueIter;
 use arrow_schema::{ArrowError, DataType, Field, Fields, Schema};
 use indexmap::map::IndexMap as HashMap;
 use indexmap::set::IndexSet as HashSet;
@@ -127,83 +128,6 @@ fn generate_schema(spec: HashMap<String, InferredType>) -> Result<Schema, ArrowE
     Ok(Schema::new(generate_fields(&spec)?))
 }
 
-/// JSON file reader that produces a serde_json::Value iterator from a Read trait
-///
-/// # Example
-///
-/// ```
-/// use std::fs::File;
-/// use std::io::BufReader;
-/// use arrow_json::reader::ValueIter;
-///
-/// let mut reader =
-///     BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
-/// let mut value_reader = ValueIter::new(&mut reader, None);
-/// for value in value_reader {
-///     println!("JSON value: {}", value.unwrap());
-/// }
-/// ```
-#[derive(Debug)]
-pub struct ValueIter<R: BufRead> {
-    reader: R,
-    max_read_records: Option<usize>,
-    record_count: usize,
-    // reuse line buffer to avoid allocation on each record
-    line_buf: String,
-}
-
-impl<R: BufRead> ValueIter<R> {
-    /// Creates a new `ValueIter`
-    pub fn new(reader: R, max_read_records: Option<usize>) -> Self {
-        Self {
-            reader,
-            max_read_records,
-            record_count: 0,
-            line_buf: String::new(),
-        }
-    }
-}
-
-impl<R: BufRead> Iterator for ValueIter<R> {
-    type Item = Result<Value, ArrowError>;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        if let Some(max) = self.max_read_records {
-            if self.record_count >= max {
-                return None;
-            }
-        }
-
-        loop {
-            self.line_buf.truncate(0);
-            match self.reader.read_line(&mut self.line_buf) {
-                Ok(0) => {
-                    // read_line returns 0 when stream reached EOF
-                    return None;
-                }
-                Err(e) => {
-                    return Some(Err(ArrowError::JsonError(format!(
-                        "Failed to read JSON record: {e}"
-                    ))));
-                }
-                _ => {
-                    let trimmed_s = self.line_buf.trim();
-                    if trimmed_s.is_empty() {
-                        // ignore empty lines
-                        continue;
-                    }
-
-                    self.record_count += 1;
-                    return Some(
-                        serde_json::from_str(trimmed_s)
-                            .map_err(|e| ArrowError::JsonError(format!("Not valid JSON: {e}"))),
-                    );
-                }
-            }
-        }
-    }
-}
-
 /// Infer the fields of a JSON file by reading the first n records of the file, with
 /// `max_read_records` controlling the maximum number of records to read.
 ///
@@ -282,7 +206,7 @@ pub fn infer_json_schema<R: BufRead>(
 ) -> Result<(Schema, usize), ArrowError> {
     let mut values = ValueIter::new(reader, max_read_records);
     let schema = infer_json_schema_from_iterator(&mut values)?;
-    Ok((schema, values.record_count))
+    Ok((schema, values.record_count()))
 }
 
 fn set_object_scalar_field_type(
diff --git a/arrow-json/src/reader/value_iter.rs b/arrow-json/src/reader/value_iter.rs
new file mode 100644
index 000000000000..ebaba695adf3
--- /dev/null
+++ b/arrow-json/src/reader/value_iter.rs
@@ -0,0 +1,103 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::io::BufRead;
+
+use arrow_schema::ArrowError;
+use serde_json::Value;
+
+/// JSON file reader that produces a serde_json::Value iterator from a Read trait
+///
+/// # Example
+///
+/// ```
+/// use std::fs::File;
+/// use std::io::BufReader;
+/// use arrow_json::reader::ValueIter;
+///
+/// let mut reader =
+///     BufReader::new(File::open("test/data/mixed_arrays.json").unwrap());
+/// let mut value_reader = ValueIter::new(&mut reader, None);
+/// for value in value_reader {
+///     println!("JSON value: {}", value.unwrap());
+/// }
+/// ```
+#[derive(Debug)]
+pub struct ValueIter<R: BufRead> {
+    reader: R,
+    max_read_records: Option<usize>,
+    record_count: usize,
+    // reuse line buffer to avoid allocation on each record
+    line_buf: String,
+}
+
+impl<R: BufRead> ValueIter<R> {
+    /// Creates a new `ValueIter`
+    pub fn new(reader: R, max_read_records: Option<usize>) -> Self {
+        Self {
+            reader,
+            max_read_records,
+            record_count: 0,
+            line_buf: String::new(),
+        }
+    }
+
+    /// Returns the number of records this iterator has consumed
+    pub fn record_count(&self) -> usize {
+        self.record_count
+    }
+}
+
+impl<R: BufRead> Iterator for ValueIter<R> {
+    type Item = Result<Value, ArrowError>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        if let Some(max) = self.max_read_records {
+            if self.record_count >= max {
+                return None;
+            }
+        }
+
+        loop {
+            self.line_buf.clear();
+            match self.reader.read_line(&mut self.line_buf) {
+                Ok(0) => {
+                    // read_line returns 0 when stream reached EOF
+                    return None;
+                }
+                Err(e) => {
+                    return Some(Err(ArrowError::JsonError(format!(
+                        "Failed to read JSON record: {e}"
+                    ))));
+                }
+                _ => {
+                    let trimmed_s = self.line_buf.trim();
+                    if trimmed_s.is_empty() {
+                        // ignore empty lines
+                        continue;
+                    }
+
+                    self.record_count += 1;
+                    return Some(
+                        serde_json::from_str(trimmed_s)
+                            .map_err(|e| ArrowError::JsonError(format!("Not valid JSON: {e}"))),
+                    );
+                }
+            }
+        }
+    }
+}
diff --git a/arrow-json/src/writer/encoder.rs b/arrow-json/src/writer/encoder.rs
index d7c3fbbe2e34..45055c5a36a8 100644
--- a/arrow-json/src/writer/encoder.rs
+++ b/arrow-json/src/writer/encoder.rs
@@ -352,15 +352,23 @@ pub fn make_encoder<'a>(
         }
         DataType::List(_) => {
             let array = array.as_list::<i32>();
-            NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned())
+            NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned())
         }
         DataType::LargeList(_) => {
             let array = array.as_list::<i64>();
-            NullableEncoder::new(Box::new(ListEncoder::try_new(field, array, options)?), array.nulls().cloned())
+            NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned())
+        }
+        DataType::ListView(_) => {
+            let array = array.as_list_view::<i32>();
+            NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned())
+        }
+        DataType::LargeListView(_) => {
+            let array = array.as_list_view::<i64>();
+            NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned())
         }
         DataType::FixedSizeList(_, _) => {
             let array = array.as_fixed_size_list();
-            NullableEncoder::new(Box::new(FixedSizeListEncoder::try_new(field, array, options)?), array.nulls().cloned())
+            NullableEncoder::new(Box::new(ListLikeEncoder::try_new(field, array, options)?), array.nulls().cloned())
         }
 
         DataType::Dictionary(_, _) => downcast_dictionary_array! {
@@ -639,77 +647,30 @@ impl Encoder for BinaryViewEncoder<'_> {
     }
 }
 
-struct ListEncoder<'a, O: OffsetSizeTrait> {
-    offsets: OffsetBuffer<O>,
-    encoder: NullableEncoder<'a>,
-}
-
-impl<'a, O: OffsetSizeTrait> ListEncoder<'a, O> {
-    fn try_new(
-        field: &'a FieldRef,
-        array: &'a GenericListArray<O>,
-        options: &'a EncoderOptions,
-    ) -> Result<Self, ArrowError> {
-        let encoder = make_encoder(field, array.values().as_ref(), options)?;
-        Ok(Self {
-            offsets: array.offsets().clone(),
-            encoder,
-        })
-    }
-}
-
-impl<O: OffsetSizeTrait> Encoder for ListEncoder<'_, O> {
-    fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
-        let end = self.offsets[idx + 1].as_usize();
-        let start = self.offsets[idx].as_usize();
-        out.push(b'[');
-
-        if self.encoder.has_nulls() {
-            for idx in start..end {
-                if idx != start {
-                    out.push(b',')
-                }
-                if self.encoder.is_null(idx) {
-                    out.extend_from_slice(b"null");
-                } else {
-                    self.encoder.encode(idx, out);
-                }
-            }
-        } else {
-            for idx in start..end {
-                if idx != start {
-                    out.push(b',')
-                }
-                self.encoder.encode(idx, out);
-            }
-        }
-        out.push(b']');
-    }
-}
-
-struct FixedSizeListEncoder<'a> {
-    value_length: usize,
+struct ListLikeEncoder<'a, L: ListLikeArray> {
+    list_array: &'a L,
     encoder: NullableEncoder<'a>,
 }
 
-impl<'a> FixedSizeListEncoder<'a> {
+impl<'a, L: ListLikeArray> ListLikeEncoder<'a, L> {
     fn try_new(
         field: &'a FieldRef,
-        array: &'a FixedSizeListArray,
+        array: &'a L,
         options: &'a EncoderOptions,
     ) -> Result<Self, ArrowError> {
         let encoder = make_encoder(field, array.values().as_ref(), options)?;
         Ok(Self {
+            list_array: array,
             encoder,
-            value_length: array.value_length().as_usize(),
         })
     }
 }
 
-impl Encoder for FixedSizeListEncoder<'_> {
+impl<L: ListLikeArray> Encoder for ListLikeEncoder<'_, L> {
     fn encode(&mut self, idx: usize, out: &mut Vec<u8>) {
-        let start = idx * self.value_length;
-        let end = start + self.value_length;
+        let range = self.list_array.element_range(idx);
+        let start = range.start;
+        let end = range.end;
         out.push(b'[');
         if self.encoder.has_nulls() {
             for idx in start..end {
diff --git a/arrow-json/src/writer/mod.rs b/arrow-json/src/writer/mod.rs
index 2fac5ab62353..04cc8c9e2a93 100644
--- a/arrow-json/src/writer/mod.rs
+++ b/arrow-json/src/writer/mod.rs
@@ -1241,6 +1241,54 @@ mod tests {
         );
     }
 
+    fn assert_write_list_view<O: OffsetSizeTrait>() {
+        let field = Arc::new(Field::new("item", DataType::Int32, true));
+        let data_type = GenericListViewArray::<O>::DATA_TYPE_CONSTRUCTOR(field.clone());
+        let schema = Schema::new(vec![Field::new("lv", data_type, true)]);
+
+        // rows: [1, 2, 3], [4, null], null, [6]
+        let values = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(4), None, Some(6)]);
+        let offsets = [0, 3, 0, 5]
+            .iter()
+            .map(|&v| O::from_usize(v).unwrap())
+            .collect::<Vec<_>>();
+        let sizes = [3, 2, 0, 1]
+            .iter()
+            .map(|&v| O::from_usize(v).unwrap())
+            .collect::<Vec<_>>();
+        let list_view = GenericListViewArray::<O>::try_new(
+            field,
+            ScalarBuffer::from(offsets),
+            ScalarBuffer::from(sizes),
+            Arc::new(values),
+            Some(NullBuffer::from_iter([true, true, false, true])),
+        )
+        .unwrap();
+
+        let batch = RecordBatch::try_new(Arc::new(schema), vec![Arc::new(list_view)]).unwrap();
+
+        let mut buf = Vec::new();
+        {
+            let mut writer = LineDelimitedWriter::new(&mut buf);
+            writer.write_batches(&[&batch]).unwrap();
+        }
+
+        assert_json_eq(
+            &buf,
+            r#"{"lv":[1,2,3]}
+{"lv":[4,null]}
+{}
+{"lv":[6]}
+"#,
+        );
+    }
+
+    #[test]
+    fn write_list_view() {
+        assert_write_list_view::<i32>();
+        assert_write_list_view::<i64>();
+    }
+
     fn test_write_for_file(test_file: &str, remove_nulls: bool) {
         let file = File::open(test_file).unwrap();
         let mut reader = BufReader::new(file);
diff --git a/arrow-pyarrow/src/lib.rs b/arrow-pyarrow/src/lib.rs
index 15951f8dcfbf..95f1d38fddf3 100644
--- a/arrow-pyarrow/src/lib.rs
+++ b/arrow-pyarrow/src/lib.rs
@@ -61,7 +61,6 @@
 
 use std::convert::{From, TryFrom};
 use std::ffi::CStr;
-use std::ptr::{addr_of, addr_of_mut};
 use std::sync::Arc;
 
 use arrow_array::ffi;
@@ -75,10 +74,10 @@ use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType, Field, Schema, SchemaRef};
 use pyo3::exceptions::{PyTypeError, PyValueError};
 use pyo3::ffi::Py_uintptr_t;
+use pyo3::import_exception;
 use pyo3::prelude::*;
-use pyo3::pybacked::PyBackedStr;
-use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple};
-use pyo3::{import_exception, intern};
+use pyo3::sync::PyOnceLock;
+use pyo3::types::{PyCapsule, PyDict, PyList, PyTuple, PyType};
 
 import_exception!(pyarrow, ArrowException);
 /// Represents an exception raised by PyArrow.
@@ -118,17 +117,13 @@ impl<T: ToPyArrow> IntoPyArrow for T {
     }
 }
 
-fn validate_class(expected: &str, value: &Bound<PyAny>) -> PyResult<()> {
-    let pyarrow = PyModule::import(value.py(), "pyarrow")?;
-    let class = pyarrow.getattr(expected)?;
-    if !value.is_instance(&class)? {
-        let expected_module = class.getattr("__module__")?.extract::<PyBackedStr>()?;
-        let expected_name = class.getattr("__name__")?.extract::<PyBackedStr>()?;
+fn validate_class(expected: &Bound<PyType>, value: &Bound<PyAny>) -> PyResult<()> {
+    if !value.is_instance(expected)? {
+        let expected_module = expected.getattr("__module__")?;
+        let expected_name = expected.getattr("__name__")?;
         let found_class = value.get_type();
-        let found_module = found_class
-            .getattr("__module__")?
-            .extract::<PyBackedStr>()?;
-        let found_name = found_class.getattr("__name__")?.extract::<PyBackedStr>()?;
+        let found_module = found_class.getattr("__module__")?;
+        let found_name = found_class.getattr("__name__")?;
         return Err(PyTypeError::new_err(format!(
             "Expected instance of {expected_module}.{expected_name}, got {found_module}.{found_name}",
         )));
@@ -160,37 +155,27 @@ impl FromPyArrow for DataType {
         // method, so prefer it over _export_to_c.
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
-            let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.cast::<PyCapsule>()?;
-            validate_pycapsule(capsule, "arrow_schema")?;
+            let capsule = value.call_method0("__arrow_c_schema__")?.extract()?;
+            validate_pycapsule(&capsule, "arrow_schema")?;
 
             let schema_ptr = capsule
                 .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
                 .cast::<FFI_ArrowSchema>();
-            unsafe {
-                let dtype = DataType::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
-                return Ok(dtype);
-            }
+            return unsafe { DataType::try_from(schema_ptr.as_ref()) }.map_err(to_py_err);
         }
 
-        validate_class("DataType", value)?;
+        validate_class(data_type_class(value.py())?, value)?;
 
-        let c_schema = FFI_ArrowSchema::empty();
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
-        let dtype = DataType::try_from(&c_schema).map_err(to_py_err)?;
-        Ok(dtype)
+        let mut c_schema = FFI_ArrowSchema::empty();
+        value.call_method1("_export_to_c", (&raw mut c_schema as Py_uintptr_t,))?;
+        DataType::try_from(&c_schema).map_err(to_py_err)
     }
 }
 
 impl ToPyArrow for DataType {
     fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("DataType")?;
-        let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
-        Ok(dtype)
+        data_type_class(py)?.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,))
     }
 }
 
@@ -200,37 +185,27 @@ impl FromPyArrow for Field {
         // method, so prefer it over _export_to_c.
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
-            let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.cast::<PyCapsule>()?;
-            validate_pycapsule(capsule, "arrow_schema")?;
+            let capsule = value.call_method0("__arrow_c_schema__")?.extract()?;
+            validate_pycapsule(&capsule, "arrow_schema")?;
 
             let schema_ptr = capsule
                 .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
                 .cast::<FFI_ArrowSchema>();
-            unsafe {
-                let field = Field::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
-                return Ok(field);
-            }
+            return unsafe { Field::try_from(schema_ptr.as_ref()) }.map_err(to_py_err);
         }
 
-        validate_class("Field", value)?;
+        validate_class(field_class(value.py())?, value)?;
 
-        let c_schema = FFI_ArrowSchema::empty();
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
-        let field = Field::try_from(&c_schema).map_err(to_py_err)?;
-        Ok(field)
+        let mut c_schema = FFI_ArrowSchema::empty();
+        value.call_method1("_export_to_c", (&raw mut c_schema as Py_uintptr_t,))?;
+        Field::try_from(&c_schema).map_err(to_py_err)
     }
 }
 
 impl ToPyArrow for Field {
     fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("Field")?;
-        let dtype = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
-        Ok(dtype)
+        field_class(py)?.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,))
     }
 }
 
@@ -240,37 +215,27 @@ impl FromPyArrow for Schema {
         // method, so prefer it over _export_to_c.
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_schema__")? {
-            let capsule = value.getattr("__arrow_c_schema__")?.call0()?;
-            let capsule = capsule.cast::<PyCapsule>()?;
-            validate_pycapsule(capsule, "arrow_schema")?;
+            let capsule = value.call_method0("__arrow_c_schema__")?.extract()?;
+            validate_pycapsule(&capsule, "arrow_schema")?;
 
             let schema_ptr = capsule
                 .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
                 .cast::<FFI_ArrowSchema>();
-            unsafe {
-                let schema = Schema::try_from(schema_ptr.as_ref()).map_err(to_py_err)?;
-                return Ok(schema);
-            }
+            return unsafe { Schema::try_from(schema_ptr.as_ref()) }.map_err(to_py_err);
         }
 
-        validate_class("Schema", value)?;
+        validate_class(schema_class(value.py())?, value)?;
 
-        let c_schema = FFI_ArrowSchema::empty();
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        value.call_method1("_export_to_c", (c_schema_ptr as Py_uintptr_t,))?;
-        let schema = Schema::try_from(&c_schema).map_err(to_py_err)?;
-        Ok(schema)
+        let mut c_schema = FFI_ArrowSchema::empty();
+        value.call_method1("_export_to_c", (&raw mut c_schema as Py_uintptr_t,))?;
+        Schema::try_from(&c_schema).map_err(to_py_err)
     }
 }
 
 impl ToPyArrow for Schema {
     fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let c_schema = FFI_ArrowSchema::try_from(self).map_err(to_py_err)?;
-        let c_schema_ptr = &c_schema as *const FFI_ArrowSchema;
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("Schema")?;
-        let schema = class.call_method1("_import_from_c", (c_schema_ptr as Py_uintptr_t,))?;
-        Ok(schema)
+        schema_class(py)?.call_method1("_import_from_c", (&raw const c_schema as Py_uintptr_t,))
     }
 }
 
@@ -280,21 +245,11 @@ impl FromPyArrow for ArrayData {
         // method, so prefer it over _export_to_c.
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_array__")? {
-            let tuple = value.getattr("__arrow_c_array__")?.call0()?;
+            let (schema_capsule, array_capsule) =
+                value.call_method0("__arrow_c_array__")?.extract()?;
 
-            if !tuple.is_instance_of::<PyTuple>() {
-                return Err(PyTypeError::new_err(
-                    "Expected __arrow_c_array__ to return a tuple.",
-                ));
-            }
-
-            let schema_capsule = tuple.get_item(0)?;
-            let schema_capsule = schema_capsule.cast::<PyCapsule>()?;
-            let array_capsule = tuple.get_item(1)?;
-            let array_capsule = array_capsule.cast::<PyCapsule>()?;
-
-            validate_pycapsule(schema_capsule, "arrow_schema")?;
-            validate_pycapsule(array_capsule, "arrow_array")?;
+            validate_pycapsule(&schema_capsule, "arrow_schema")?;
+            validate_pycapsule(&array_capsule, "arrow_array")?;
 
             let schema_ptr = schema_capsule
                 .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
@@ -310,7 +265,7 @@ impl FromPyArrow for ArrayData {
             return unsafe { ffi::from_ffi(array, schema_ptr.as_ref()) }.map_err(to_py_err);
         }
 
-        validate_class("Array", value)?;
+        validate_class(array_class(value.py())?, value)?;
 
         // prepare a pointer to receive the Array struct
         let mut array = FFI_ArrowArray::empty();
@@ -322,8 +277,8 @@ impl FromPyArrow for ArrayData {
         value.call_method1(
             "_export_to_c",
             (
-                addr_of_mut!(array) as Py_uintptr_t,
-                addr_of_mut!(schema) as Py_uintptr_t,
+                &raw mut array as Py_uintptr_t,
+                &raw mut schema as Py_uintptr_t,
             ),
         )?;
 
@@ -335,17 +290,13 @@ impl ToPyArrow for ArrayData {
     fn to_pyarrow<'py>(&self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
         let array = FFI_ArrowArray::new(self);
         let schema = FFI_ArrowSchema::try_from(self.data_type()).map_err(to_py_err)?;
-
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("Array")?;
-        let array = class.call_method1(
+        array_class(py)?.call_method1(
             "_import_from_c",
             (
-                addr_of!(array) as Py_uintptr_t,
-                addr_of!(schema) as Py_uintptr_t,
+                &raw const array as Py_uintptr_t,
+                &raw const schema as Py_uintptr_t,
             ),
-        )?;
-        Ok(array)
+        )
     }
 }
 
@@ -373,21 +324,11 @@ impl FromPyArrow for RecordBatch {
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
 
         if value.hasattr("__arrow_c_array__")? {
-            let tuple = value.getattr("__arrow_c_array__")?.call0()?;
-
-            if !tuple.is_instance_of::<PyTuple>() {
-                return Err(PyTypeError::new_err(
-                    "Expected __arrow_c_array__ to return a tuple.",
-                ));
-            }
-
-            let schema_capsule = tuple.get_item(0)?;
-            let schema_capsule = schema_capsule.cast::<PyCapsule>()?;
-            let array_capsule = tuple.get_item(1)?;
-            let array_capsule = array_capsule.cast::<PyCapsule>()?;
+            let (schema_capsule, array_capsule) =
+                value.call_method0("__arrow_c_array__")?.extract()?;
 
-            validate_pycapsule(schema_capsule, "arrow_schema")?;
-            validate_pycapsule(array_capsule, "arrow_array")?;
+            validate_pycapsule(&schema_capsule, "arrow_schema")?;
+            validate_pycapsule(&array_capsule, "arrow_array")?;
 
             let schema_ptr = schema_capsule
                 .pointer_checked(Some(ARROW_SCHEMA_CAPSULE_NAME))?
@@ -423,7 +364,7 @@ impl FromPyArrow for RecordBatch {
             return RecordBatch::try_new_with_options(schema, columns, &options).map_err(to_py_err);
         }
 
-        validate_class("RecordBatch", value)?;
+        validate_class(record_batch_class(value.py())?, value)?;
         // TODO(kszucs): implement the FFI conversions in arrow-rs for RecordBatches
         let schema = value.getattr("schema")?;
         let schema = Arc::new(Schema::from_pyarrow_bound(&schema)?);
@@ -464,9 +405,9 @@ impl FromPyArrow for ArrowArrayStreamReader {
         // method, so prefer it over _export_to_c.
         // See https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
         if value.hasattr("__arrow_c_stream__")? {
-            let capsule = value.getattr("__arrow_c_stream__")?.call0()?;
-            let capsule = capsule.cast::<PyCapsule>()?;
-            validate_pycapsule(capsule, "arrow_array_stream")?;
+            let capsule = value.call_method0("__arrow_c_stream__")?.extract()?;
+
+            validate_pycapsule(&capsule, "arrow_array_stream")?;
 
             let stream = unsafe {
                 FFI_ArrowArrayStream::from_raw(
@@ -483,22 +424,19 @@ impl FromPyArrow for ArrowArrayStreamReader {
             return Ok(stream_reader);
         }
 
-        validate_class("RecordBatchReader", value)?;
+        validate_class(record_batch_reader_class(value.py())?, value)?;
 
-        // prepare a pointer to receive the stream struct
+        // prepare the stream struct to receive the content
         let mut stream = FFI_ArrowArrayStream::empty();
-        let stream_ptr = &mut stream as *mut FFI_ArrowArrayStream;
 
         // make the conversion through PyArrow's private API
         // this changes the pointer's memory and is thus unsafe.
         // In particular, `_export_to_c` can go out of bounds
-        let args = PyTuple::new(value.py(), [stream_ptr as Py_uintptr_t])?;
+        let args = PyTuple::new(value.py(), [&raw mut stream as Py_uintptr_t])?;
         value.call_method1("_export_to_c", args)?;
 
-        let stream_reader = ArrowArrayStreamReader::try_new(stream)
-            .map_err(|err| PyValueError::new_err(err.to_string()))?;
-
-        Ok(stream_reader)
+        ArrowArrayStreamReader::try_new(stream)
+            .map_err(|err| PyValueError::new_err(err.to_string()))
     }
 }
 
@@ -507,15 +445,9 @@ impl IntoPyArrow for Box<dyn RecordBatchReader + Send> {
     // We can't implement `ToPyArrow` for `T: RecordBatchReader + Send` because
     // there is already a blanket implementation for `T: ToPyArrow`.
     fn into_pyarrow<'py>(self, py: Python<'py>) -> PyResult<Bound<'py, PyAny>> {
-        let mut stream = FFI_ArrowArrayStream::new(self);
-
-        let stream_ptr = (&mut stream) as *mut FFI_ArrowArrayStream;
-        let module = py.import("pyarrow")?;
-        let class = module.getattr("RecordBatchReader")?;
-        let args = PyTuple::new(py, [stream_ptr as Py_uintptr_t])?;
-        let reader = class.call_method1("_import_from_c", args)?;
-
-        Ok(reader)
+        let stream = FFI_ArrowArrayStream::new(self);
+        record_batch_reader_class(py)?
+            .call_method1("_import_from_c", (&raw const stream as Py_uintptr_t,))
     }
 }
 
@@ -599,28 +531,57 @@ impl FromPyArrow for Table {
     fn from_pyarrow_bound(ob: &Bound<PyAny>) -> PyResult<Self> {
         let reader: Box<dyn RecordBatchReader> =
             Box::new(ArrowArrayStreamReader::from_pyarrow_bound(ob)?);
-        Self::try_from(reader).map_err(|err| PyErr::new::<PyValueError, _>(err.to_string()))
+        Self::try_from(reader).map_err(|err| PyValueError::new_err(err.to_string()))
     }
 }
 
 /// Convert a [`Table`] into `pyarrow.Table`.
 impl IntoPyArrow for Table {
     fn into_pyarrow(self, py: Python) -> PyResult<Bound<PyAny>> {
-        let module = py.import(intern!(py, "pyarrow"))?;
-        let class = module.getattr(intern!(py, "Table"))?;
-
         let py_batches = PyList::new(py, self.record_batches.into_iter().map(PyArrowType))?;
         let py_schema = PyArrowType(Arc::unwrap_or_clone(self.schema));
 
         let kwargs = PyDict::new(py);
         kwargs.set_item("schema", py_schema)?;
 
-        let reader = class.call_method("from_batches", (py_batches,), Some(&kwargs))?;
-
-        Ok(reader)
+        table_class(py)?.call_method("from_batches", (py_batches,), Some(&kwargs))
     }
 }
 
+fn array_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
+    static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
+    TYPE.import(py, "pyarrow", "Array")
+}
+
+fn record_batch_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
+    static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
+    TYPE.import(py, "pyarrow", "RecordBatch")
+}
+
+fn record_batch_reader_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
+    static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
+    TYPE.import(py, "pyarrow", "RecordBatchReader")
+}
+fn data_type_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
+    static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
+    TYPE.import(py, "pyarrow", "DataType")
+}
+
+fn field_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
+    static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
+    TYPE.import(py, "pyarrow", "Field")
+}
+
+fn schema_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
+    static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
+    TYPE.import(py, "pyarrow", "Schema")
+}
+
+fn table_class(py: Python<'_>) -> PyResult<&Bound<'_, PyType>> {
+    static TYPE: PyOnceLock<Py<PyType>> = PyOnceLock::new();
+    TYPE.import(py, "pyarrow", "Table")
+}
+
 /// A newtype wrapper for types implementing [`FromPyArrow`] or [`IntoPyArrow`].
 ///
 /// When wrapped around a type `T: FromPyArrow`, it
@@ -644,7 +605,7 @@ impl<'py, T: IntoPyArrow> IntoPyObject<'py> for PyArrowType<T> {
 
     type Error = PyErr;
 
-    fn into_pyobject(self, py: Python<'py>) -> Result<Self::Output, PyErr> {
+    fn into_pyobject(self, py: Python<'py>) -> PyResult<Self::Output> {
         self.0.into_pyarrow(py)
     }
 }
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 9679c89b4807..078c4574775d 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1414,9 +1414,12 @@ impl DoubleEndedIterator for RowsIter<'_> {
         if self.end == self.start {
             return None;
         }
-        // Safety: We have checked that `start` is less than `end`
-        let row = unsafe { self.rows.row_unchecked(self.end) };
+
         self.end -= 1;
+
+        // Safety: By construction we create `end >= start`, so if `end` is not equal to `start` it cannot be less than `start`
+        //          therefore `end - 1` is within range
+        let row = unsafe { self.rows.row_unchecked(self.end) };
         Some(row)
     }
 }
@@ -5651,4 +5654,40 @@ mod tests {
                 .contains("not yet implemented")
         );
     }
+
+    #[test]
+    fn empty_row_iter_next_back() {
+        let rows = RowConverter::new(vec![SortField::new(DataType::UInt8)])
+            .unwrap()
+            .empty_rows(0, 0);
+        let mut rows_iter = rows.iter();
+        assert_eq!(rows_iter.next_back(), None);
+        assert_eq!(rows_iter.next_back(), None);
+        assert_eq!(rows_iter.next_back(), None);
+    }
+
+    #[test]
+    fn row_iter_next_back() {
+        let row_converter = RowConverter::new(vec![SortField::new(DataType::UInt8)]).unwrap();
+        let mut rng = StdRng::seed_from_u64(42);
+        let array = generate_primitive_array::<UInt8Type>(&mut rng, 100, 0.8);
+        let rows = row_converter.convert_columns(&[Arc::new(array)]).unwrap();
+
+        let mut rows_iter = rows.iter();
+        let mut bytes: Vec<u8> = vec![];
+
+        while let Some(row) = rows_iter.next_back() {
+            bytes.extend(row.data.iter().rev());
+        }
+
+        bytes.reverse();
+
+        assert_eq!(
+            bytes,
+            &rows.buffer.as_slice()[..*rows.offsets.last().unwrap()]
+        );
+
+        assert_eq!(rows_iter.next_back(), None);
+        assert_eq!(rows_iter.next(), None);
+    }
 }
diff --git a/arrow-schema/Cargo.toml b/arrow-schema/Cargo.toml
index fb6461a9e9ae..2991e2aa46b6 100644
--- a/arrow-schema/Cargo.toml
+++ b/arrow-schema/Cargo.toml
@@ -54,7 +54,7 @@ all-features = true
 
 [dev-dependencies]
 criterion = { workspace = true, default-features = false }
-insta = "1.43.1"
+insta = { workspace = true, default-features = true }
 postcard = { version = "1.0.10", default-features = false, features = ["use-std"] }
 
 [[bench]]
diff --git a/arrow-schema/src/field.rs b/arrow-schema/src/field.rs
index a1c509abf2e0..1f2b57564ded 100644
--- a/arrow-schema/src/field.rs
+++ b/arrow-schema/src/field.rs
@@ -830,6 +830,9 @@ impl Field {
                         .try_for_each(|f| builder.try_merge(f))?;
                     *nested_fields = builder.finish().fields;
                 }
+                DataType::Null => {
+                    self.nullable = true;
+                }
                 _ => {
                     return Err(ArrowError::SchemaError(format!(
                         "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Struct",
@@ -841,6 +844,9 @@ impl Field {
                 DataType::Union(from_nested_fields, _) => {
                     nested_fields.try_merge(from_nested_fields)?
                 }
+                DataType::Null => {
+                    self.nullable = true;
+                }
                 _ => {
                     return Err(ArrowError::SchemaError(format!(
                         "Fail to merge schema field '{}' because the from data_type = {} is not DataType::Union",
@@ -854,6 +860,9 @@ impl Field {
                     f.try_merge(from_field)?;
                     (*field) = Arc::new(f);
                 }
+                DataType::Null => {
+                    self.nullable = true;
+                }
                 _ => {
                     return Err(ArrowError::SchemaError(format!(
                         "Fail to merge schema field '{}' because the from data_type = {} is not DataType::List",
@@ -867,6 +876,9 @@ impl Field {
                     f.try_merge(from_field)?;
                     (*field) = Arc::new(f);
                 }
+                DataType::Null => {
+                    self.nullable = true;
+                }
                 _ => {
                     return Err(ArrowError::SchemaError(format!(
                         "Fail to merge schema field '{}' because the from data_type = {} is not DataType::LargeList",
@@ -1461,4 +1473,58 @@ mod test {
 
         assert_binary_serde_round_trip(field)
     }
+
+    #[test]
+    fn test_merge_compound_with_null() {
+        // Struct + Null
+        let mut field = Field::new(
+            "s",
+            DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, false)])),
+            false,
+        );
+        field
+            .try_merge(&Field::new("s", DataType::Null, true))
+            .expect("Struct should merge with Null");
+        assert!(field.is_nullable());
+        assert!(matches!(field.data_type(), DataType::Struct(_)));
+
+        // List + Null
+        let mut field = Field::new(
+            "l",
+            DataType::List(Field::new("item", DataType::Utf8, false).into()),
+            false,
+        );
+        field
+            .try_merge(&Field::new("l", DataType::Null, true))
+            .expect("List should merge with Null");
+        assert!(field.is_nullable());
+        assert!(matches!(field.data_type(), DataType::List(_)));
+
+        // LargeList + Null
+        let mut field = Field::new(
+            "ll",
+            DataType::LargeList(Field::new("item", DataType::Utf8, false).into()),
+            false,
+        );
+        field
+            .try_merge(&Field::new("ll", DataType::Null, true))
+            .expect("LargeList should merge with Null");
+        assert!(field.is_nullable());
+        assert!(matches!(field.data_type(), DataType::LargeList(_)));
+
+        // Union + Null
+        let mut field = Field::new(
+            "u",
+            DataType::Union(
+                UnionFields::try_new(vec![0], vec![Field::new("f", DataType::Int32, false)])
+                    .unwrap(),
+                UnionMode::Dense,
+            ),
+            false,
+        );
+        field
+            .try_merge(&Field::new("u", DataType::Null, true))
+            .expect("Union should merge with Null");
+        assert!(matches!(field.data_type(), DataType::Union(_, _)));
+    }
 }
diff --git a/arrow-select/src/coalesce/byte_view.rs b/arrow-select/src/coalesce/byte_view.rs
index bca811fff1c6..6062cd5e77aa 100644
--- a/arrow-select/src/coalesce/byte_view.rs
+++ b/arrow-select/src/coalesce/byte_view.rs
@@ -101,7 +101,6 @@ impl<B: ByteViewType> InProgressByteViewArray<B> {
         if self.views.capacity() == 0 {
             self.views.reserve(self.batch_size);
         }
-        debug_assert_eq!(self.views.capacity(), self.batch_size);
     }
 
     /// Finishes in progress buffer, if any
diff --git a/arrow-select/src/coalesce/primitive.rs b/arrow-select/src/coalesce/primitive.rs
index 69dad221bd52..a7f2fb32ce49 100644
--- a/arrow-select/src/coalesce/primitive.rs
+++ b/arrow-select/src/coalesce/primitive.rs
@@ -58,7 +58,6 @@ impl<T: ArrowPrimitiveType> InProgressPrimitiveArray<T> {
         if self.current.capacity() == 0 {
             self.current.reserve(self.batch_size);
         }
-        debug_assert_eq!(self.current.capacity(), self.batch_size);
     }
 }
 
diff --git a/arrow-select/src/interleave.rs b/arrow-select/src/interleave.rs
index 6598a5eb0da0..f5904bc171ee 100644
--- a/arrow-select/src/interleave.rs
+++ b/arrow-select/src/interleave.rs
@@ -154,13 +154,54 @@ fn interleave_primitive<T: ArrowPrimitiveType>(
     data_type: &DataType,
 ) -> Result<ArrayRef, ArrowError> {
     let interleaved = Interleave::<'_, PrimitiveArray<T>>::new(values, indices);
+    let arrays = &interleaved.arrays;
+    let len = indices.len();
+
+    let mut output = Vec::with_capacity(len);
+    let dst: *mut T::Native = output.as_mut_ptr();
+    let mut base = 0;
+
+    // Process 8 elements at a time to issue multiple independent loads
+    // and increase memory-level parallelism for random access patterns.
+    let chunks = indices.chunks_exact(8);
+    let remainder = chunks.remainder();
+    for chunk in chunks {
+        let v0 = arrays[chunk[0].0].value(chunk[0].1);
+        let v1 = arrays[chunk[1].0].value(chunk[1].1);
+        let v2 = arrays[chunk[2].0].value(chunk[2].1);
+        let v3 = arrays[chunk[3].0].value(chunk[3].1);
+        let v4 = arrays[chunk[4].0].value(chunk[4].1);
+        let v5 = arrays[chunk[5].0].value(chunk[5].1);
+        let v6 = arrays[chunk[6].0].value(chunk[6].1);
+        let v7 = arrays[chunk[7].0].value(chunk[7].1);
+
+        // SAFETY: base+7 < len == output capacity
+        debug_assert!(base + 7 < len);
+        unsafe {
+            dst.add(base).write(v0);
+            dst.add(base + 1).write(v1);
+            dst.add(base + 2).write(v2);
+            dst.add(base + 3).write(v3);
+            dst.add(base + 4).write(v4);
+            dst.add(base + 5).write(v5);
+            dst.add(base + 6).write(v6);
+            dst.add(base + 7).write(v7);
+        }
+        base += 8;
+    }
 
-    let values = indices
-        .iter()
-        .map(|(a, b)| interleaved.arrays[*a].value(*b))
-        .collect::<Vec<_>>();
+    for idx in remainder {
+        // SAFETY: base < len == output capacity
+        debug_assert!(base < len);
+        unsafe { dst.add(base).write(arrays[idx.0].value(idx.1)) };
+        base += 1;
+    }
+
+    // SAFETY: all `len` elements have been initialized
+    debug_assert!(base == len);
+    unsafe { output.set_len(len) };
 
-    let array = PrimitiveArray::<T>::try_new(values.into(), interleaved.nulls)?;
+    let array = PrimitiveArray::<T>::try_new(output.into(), interleaved.nulls)?;
     Ok(Arc::new(array.with_data_type(data_type.clone())))
 }
 
@@ -173,12 +214,15 @@ fn interleave_bytes<T: ByteArrayType>(
     let mut capacity = 0;
     let mut offsets = Vec::with_capacity(indices.len() + 1);
     offsets.push(T::Offset::from_usize(0).unwrap());
-    offsets.extend(indices.iter().map(|(a, b)| {
+    for (a, b) in indices {
         let o = interleaved.arrays[*a].value_offsets();
         let element_len = o[*b + 1].as_usize() - o[*b].as_usize();
         capacity += element_len;
-        T::Offset::from_usize(capacity).expect("overflow")
-    }));
+        offsets.push(
+            T::Offset::from_usize(capacity)
+                .ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?,
+        );
+    }
 
     let mut values = Vec::with_capacity(capacity);
     for (a, b) in indices {
@@ -331,12 +375,14 @@ fn interleave_list<O: OffsetSizeTrait>(
     let mut capacity = 0usize;
     let mut offsets = Vec::with_capacity(indices.len() + 1);
     offsets.push(O::from_usize(0).unwrap());
-    offsets.extend(indices.iter().map(|(array, row)| {
+    for (array, row) in indices {
         let o = interleaved.arrays[*array].value_offsets();
         let element_len = o[*row + 1].as_usize() - o[*row].as_usize();
         capacity += element_len;
-        O::from_usize(capacity).expect("offset overflow")
-    }));
+        offsets.push(
+            O::from_usize(capacity).ok_or_else(|| ArrowError::OffsetOverflowError(capacity))?,
+        );
+    }
 
     let mut child_indices = Vec::with_capacity(capacity);
     for (array, row) in indices {
@@ -536,6 +582,7 @@ mod tests {
     use arrow_array::Int32RunArray;
     use arrow_array::builder::{GenericListBuilder, Int32Builder, PrimitiveRunBuilder};
     use arrow_array::types::Int8Type;
+    use arrow_buffer::ScalarBuffer;
     use arrow_schema::Field;
 
     #[test]
@@ -1414,4 +1461,87 @@ mod tests {
             ]
         );
     }
+
+    #[test]
+    fn test_interleave_bytes_offset_overflow() {
+        let indices: Vec<(usize, usize)> = vec![(0, 0); (i32::MAX >> 4) as usize];
+        let text = ('a'..='z').collect::<String>();
+        let values = StringArray::from(vec![Some(text)]);
+        assert!(matches!(
+            interleave(&[&values], &indices),
+            Err(ArrowError::OffsetOverflowError(_))
+        ));
+    }
+
+    #[test]
+    fn test_interleave_list_offset_overflow() {
+        // Build a ListArray<i32> with a single row containing many elements
+        let mut builder = GenericListBuilder::<i32, _>::new(Int32Builder::new());
+        for i in 0..32 {
+            builder.values().append_value(i);
+        }
+        builder.append(true);
+        let list = builder.finish();
+
+        // Interleave enough copies to overflow i32 offsets
+        let indices: Vec<(usize, usize)> = vec![(0, 0); (i32::MAX as usize / 32) + 1];
+        assert!(matches!(
+            interleave(&[&list], &indices),
+            Err(ArrowError::OffsetOverflowError(_))
+        ));
+    }
+
+    #[test]
+    fn test_interleave_list_view() {
+        // `interleave` for ListView falls through to `interleave_fallback`, which uses
+        // `MutableArrayData`. `list_view::build_extend` copies offsets/sizes but never
+        // extends the child array, so the result contains offsets/sizes that reference
+        // positions in the now-absent original child arrays while the child is empty.
+        //
+        // lv_a: [[1, 2], [3]]   (values=[1,2,3], offsets=[0,2], sizes=[2,1])
+        // lv_b: [[4, 5, 6]]     (values=[4,5,6], offsets=[0],   sizes=[3])
+        // interleave at [(0,0), (1,0), (0,1)] should produce [[1, 2], [4, 5, 6], [3]]
+        let field = Arc::new(Field::new_list_field(DataType::Int64, false));
+
+        let lv_a = ListViewArray::new(
+            Arc::clone(&field),
+            ScalarBuffer::from(vec![0i32, 2]),
+            ScalarBuffer::from(vec![2i32, 1]),
+            Arc::new(Int64Array::from(vec![1_i64, 2, 3])),
+            None,
+        );
+        let lv_b = ListViewArray::new(
+            field,
+            ScalarBuffer::from(vec![0i32]),
+            ScalarBuffer::from(vec![3i32]),
+            Arc::new(Int64Array::from(vec![4_i64, 5, 6])),
+            None,
+        );
+
+        let result = interleave(
+            &[&lv_a as &dyn Array, &lv_b as &dyn Array],
+            &[(0, 0), (1, 0), (0, 1)],
+        )
+        .unwrap();
+
+        result
+            .to_data()
+            .validate_full()
+            .expect("interleaved ListViewArray must be internally consistent");
+
+        let result_lv = result.as_list_view::<i32>();
+        assert_eq!(result_lv.len(), 3);
+        assert_eq!(
+            result_lv.value(0).as_primitive::<Int64Type>().values(),
+            &[1, 2]
+        );
+        assert_eq!(
+            result_lv.value(1).as_primitive::<Int64Type>().values(),
+            &[4, 5, 6]
+        );
+        assert_eq!(
+            result_lv.value(2).as_primitive::<Int64Type>().values(),
+            &[3]
+        );
+    }
 }
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index 3e34e794f11f..ee813f5353c2 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -18,6 +18,7 @@
 //! Defines take kernel for [Array]
 
 use std::fmt::Display;
+use std::mem::ManuallyDrop;
 use std::sync::Arc;
 
 use arrow_array::builder::{BufferBuilder, UInt32Builder};
@@ -415,10 +416,10 @@ fn take_nulls<I: ArrowPrimitiveType>(
     indices: &PrimitiveArray<I>,
 ) -> Option<NullBuffer> {
     match values.filter(|n| n.null_count() > 0) {
-        Some(n) => {
-            let buffer = take_bits(n.inner(), indices);
-            Some(NullBuffer::new(buffer)).filter(|n| n.null_count() > 0)
-        }
+        Some(n) => NullBuffer::from_unsliced_buffer(
+            take_bits(n.inner(), indices).into_inner(),
+            indices.len(),
+        ),
         None => indices.nulls().cloned(),
     }
 }
@@ -723,46 +724,127 @@ fn take_fixed_size_binary<IndexType: ArrowPrimitiveType>(
         ArrowError::InvalidArgumentError(format!("Cannot convert size '{}' to usize", size))
     })?;
 
-    let values_buffer = values.values().as_slice();
-    let mut values_buffer_builder = BufferBuilder::new(indices.len() * size_usize);
-
-    if indices.null_count() == 0 {
-        let array_iter = indices.values().iter().map(|idx| {
-            let offset = idx.as_usize() * size_usize;
-            &values_buffer[offset..offset + size_usize]
-        });
-        for slice in array_iter {
-            values_buffer_builder.append_slice(slice);
-        }
-    } else {
-        // The indices nullability cannot be ignored here because the values buffer may contain
-        // nulls which should not cause a panic.
-        let array_iter = indices.iter().map(|idx| {
-            idx.map(|idx| {
-                let offset = idx.as_usize() * size_usize;
-                &values_buffer[offset..offset + size_usize]
-            })
-        });
-        for slice in array_iter {
-            match slice {
-                None => values_buffer_builder.append_n(size_usize, 0),
-                Some(slice) => values_buffer_builder.append_slice(slice),
-            }
-        }
-    }
+    let result_buffer = match size_usize {
+        1 => take_fixed_size::<IndexType, 1>(values.values(), indices),
+        2 => take_fixed_size::<IndexType, 2>(values.values(), indices),
+        4 => take_fixed_size::<IndexType, 4>(values.values(), indices),
+        8 => take_fixed_size::<IndexType, 8>(values.values(), indices),
+        16 => take_fixed_size::<IndexType, 16>(values.values(), indices),
+        _ => take_fixed_size_binary_buffer_dynamic_length(values, indices, size_usize),
+    };
 
-    let values_buffer = values_buffer_builder.finish();
     let value_nulls = take_nulls(values.nulls(), indices);
     let final_nulls = NullBuffer::union(value_nulls.as_ref(), indices.nulls());
-
     let array_data = ArrayDataBuilder::new(DataType::FixedSizeBinary(size))
         .len(indices.len())
         .nulls(final_nulls)
         .offset(0)
-        .add_buffer(values_buffer)
+        .add_buffer(result_buffer)
         .build()?;
 
-    Ok(FixedSizeBinaryArray::from(array_data))
+    return Ok(FixedSizeBinaryArray::from(array_data));
+
+    /// Implementation of the take kernel for fixed size binary arrays.
+    #[inline(never)]
+    fn take_fixed_size_binary_buffer_dynamic_length<IndexType: ArrowPrimitiveType>(
+        values: &FixedSizeBinaryArray,
+        indices: &PrimitiveArray<IndexType>,
+        size_usize: usize,
+    ) -> Buffer {
+        let values_buffer = values.values().as_slice();
+        let mut values_buffer_builder = BufferBuilder::new(indices.len() * size_usize);
+
+        if indices.null_count() == 0 {
+            let array_iter = indices.values().iter().map(|idx| {
+                let offset = idx.as_usize() * size_usize;
+                &values_buffer[offset..offset + size_usize]
+            });
+            for slice in array_iter {
+                values_buffer_builder.append_slice(slice);
+            }
+        } else {
+            // The indices nullability cannot be ignored here because the values buffer may contain
+            // nulls which should not cause a panic.
+            let array_iter = indices.iter().map(|idx| {
+                idx.map(|idx| {
+                    let offset = idx.as_usize() * size_usize;
+                    &values_buffer[offset..offset + size_usize]
+                })
+            });
+            for slice in array_iter {
+                match slice {
+                    None => values_buffer_builder.append_n(size_usize, 0),
+                    Some(slice) => values_buffer_builder.append_slice(slice),
+                }
+            }
+        }
+
+        values_buffer_builder.finish()
+    }
+}
+
+/// Implements the take kernel semantics over a flat [`Buffer`], interpreting it as a slice of
+/// `&[[u8; N]]`, where `N` is a compile-time constant. The usage of a flat [`Buffer`] allows using
+/// this kernel without an available [`ArrowPrimitiveType`] (e.g., for `[u8; 5]`).
+///
+/// # Using This Function in the Primitive Take Kernel
+///
+/// This function is basically the same as [`take_native`] but just on a flat [`Buffer`] instead of
+/// the primitive [`ScalarBuffer`]. Ideally, the [`take_primitive`] kernel should just use this
+/// more general function. However, the "idiomatic code" requires the
+/// [feature(generic_const_exprs)](https://github.com/rust-lang/rust/issues/76560) for calling
+/// `take_fixed_size<I, { size_of::<T::Native> () } >(...)`. Once this feature has been stabilized,
+/// we can use this function also in the primitive kernels.
+fn take_fixed_size<IndexType: ArrowPrimitiveType, const N: usize>(
+    buffer: &Buffer,
+    indices: &PrimitiveArray<IndexType>,
+) -> Buffer {
+    assert_eq!(
+        buffer.len() % N,
+        0,
+        "Invalid array length in take_fixed_size"
+    );
+
+    let ptr = buffer.as_ptr();
+    let chunk_ptr = ptr.cast::<[u8; N]>();
+    let chunk_len = buffer.len() / N;
+    let buffer: &[[u8; N]] = unsafe {
+        // SAFETY: interpret an already valid slice as a slice of N-byte chunks. N divides buffer
+        // length without remainder.
+        std::slice::from_raw_parts(chunk_ptr, chunk_len)
+    };
+
+    let result_buffer = match indices.nulls().filter(|n| n.null_count() > 0) {
+        Some(n) => indices
+            .values()
+            .iter()
+            .enumerate()
+            .map(|(idx, index)| match buffer.get(index.as_usize()) {
+                Some(v) => *v,
+                // SAFETY: idx<indices.len()
+                None => match unsafe { n.inner().value_unchecked(idx) } {
+                    false => [0u8; N],
+                    true => panic!("Out-of-bounds index {index:?}"),
+                },
+            })
+            .collect::<Vec<_>>(),
+        None => indices
+            .values()
+            .iter()
+            .map(|index| buffer[index.as_usize()])
+            .collect::<Vec<_>>(),
+    };
+
+    let mut vec = ManuallyDrop::new(result_buffer); // Prevent de-allocation
+    let ptr = vec.as_mut_ptr();
+    let len = vec.len();
+    let cap = vec.capacity();
+    let result_buffer = unsafe {
+        // SAFETY: flattening an already valid Vec.
+        Vec::from_raw_parts(ptr.cast::<u8>(), len * N, cap * N)
+    };
+
+    Buffer::from_vec(result_buffer)
 }
 
 /// `take` implementation for dictionary arrays
@@ -2150,6 +2232,35 @@ mod tests {
         );
     }
 
+    /// The [`take_fixed_size_binary`] kernel contains optimizations that provide a faster
+    /// implementation for commonly-used value lengths. This test uses a value length that is not
+    /// optimized to test both code paths.
+    #[test]
+    fn test_take_fixed_size_binary_with_nulls_indices_not_optimized_length() {
+        let fsb = FixedSizeBinaryArray::try_from_sparse_iter_with_size(
+            [
+                Some(vec![0x01, 0x01, 0x01, 0x01, 0x01]),
+                Some(vec![0x02, 0x02, 0x02, 0x02, 0x01]),
+                Some(vec![0x03, 0x03, 0x03, 0x03, 0x01]),
+                Some(vec![0x04, 0x04, 0x04, 0x04, 0x01]),
+            ]
+            .into_iter(),
+            5,
+        )
+        .unwrap();
+
+        // The two middle indices are null -> Should be null in the output.
+        let indices = UInt32Array::from(vec![Some(0), None, None, Some(3)]);
+
+        let result = take_fixed_size_binary(&fsb, &indices, 5).unwrap();
+        assert_eq!(result.len(), 4);
+        assert_eq!(result.null_count(), 2);
+        assert_eq!(
+            result.nulls().unwrap().iter().collect::<Vec<_>>(),
+            vec![true, false, false, true]
+        );
+    }
+
     #[test]
     #[should_panic(expected = "index out of bounds: the len is 4 but the index is 1000")]
     fn test_take_list_out_of_bounds() {
diff --git a/arrow-string/src/regexp.rs b/arrow-string/src/regexp.rs
index ad678598ea6c..07520a209095 100644
--- a/arrow-string/src/regexp.rs
+++ b/arrow-string/src/regexp.rs
@@ -203,8 +203,7 @@ where
     let nulls = array
         .nulls()
         .map(|n| n.inner().sliced())
-        .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, array.len())))
-        .filter(|n| n.null_count() > 0);
+        .and_then(|b| NullBuffer::from_unsliced_buffer(b, array.len()));
     Ok(BooleanArray::new(values, nulls))
 }
 
diff --git a/arrow-string/src/substring.rs b/arrow-string/src/substring.rs
index 96858ee11763..05b3888a444a 100644
--- a/arrow-string/src/substring.rs
+++ b/arrow-string/src/substring.rs
@@ -22,7 +22,7 @@
 use arrow_array::builder::BufferBuilder;
 use arrow_array::types::*;
 use arrow_array::*;
-use arrow_buffer::{ArrowNativeType, BooleanBuffer, MutableBuffer, NullBuffer, OffsetBuffer};
+use arrow_buffer::{ArrowNativeType, MutableBuffer, NullBuffer, OffsetBuffer};
 use arrow_schema::{ArrowError, DataType};
 use num_traits::Zero;
 use std::cmp::Ordering;
@@ -216,8 +216,7 @@ pub fn substring_by_char<OffsetSize: OffsetSizeTrait>(
     let nulls = array
         .nulls()
         .map(|n| n.inner().sliced())
-        .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, array.len())))
-        .filter(|n| n.null_count() > 0);
+        .and_then(|b| NullBuffer::from_unsliced_buffer(b, array.len()));
     Ok(GenericStringArray::<OffsetSize>::new(
         offsets, values, nulls,
     ))
@@ -318,8 +317,7 @@ where
     let nulls = array
         .nulls()
         .map(|n| n.inner().sliced())
-        .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, array.len())))
-        .filter(|n| n.null_count() > 0);
+        .and_then(|b| NullBuffer::from_unsliced_buffer(b, array.len()));
     Ok(Arc::new(GenericByteArray::<T>::new(offsets, values, nulls)))
 }
 
@@ -356,8 +354,8 @@ fn fixed_size_binary_substring(
     let mut nulls = array
         .nulls()
         .map(|n| n.inner().sliced())
-        .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, num_of_elements)))
-        .filter(|n| n.null_count() > 0);
+        .and_then(|b| NullBuffer::from_unsliced_buffer(b, num_of_elements));
+
     if new_len == 0 && nulls.is_none() {
         // FixedSizeBinaryArray::new takes length from the values buffer, except when size == 0.
         // In that case it uses the null buffer length, so preserve the original length here.
@@ -365,6 +363,7 @@ fn fixed_size_binary_substring(
         // otherwise it collapses to an empty array (len=0).
         nulls = Some(NullBuffer::new_valid(num_of_elements));
     }
+
     Ok(Arc::new(FixedSizeBinaryArray::new(
         new_len,
         new_values.into(),
@@ -375,6 +374,7 @@ fn fixed_size_binary_substring(
 #[cfg(test)]
 mod tests {
     use super::*;
+    use arrow_buffer::BooleanBuffer;
     use arrow_buffer::Buffer;
 
     /// A helper macro to generate test cases.
diff --git a/arrow/Cargo.toml b/arrow/Cargo.toml
index 137d785eee88..8e56457ff0a5 100644
--- a/arrow/Cargo.toml
+++ b/arrow/Cargo.toml
@@ -82,6 +82,8 @@ force_validate = ["arrow-array/force_validate", "arrow-data/force_validate"]
 ffi = ["arrow-schema/ffi", "arrow-data/ffi", "arrow-array/ffi"]
 chrono-tz = ["arrow-array/chrono-tz"]
 canonical_extension_types = ["arrow-schema/canonical_extension_types"]
+# Enable memory tracking support
+pool = ["arrow-array/pool"]
 
 [dev-dependencies]
 chrono = { workspace = true }
diff --git a/arrow/benches/take_kernels.rs b/arrow/benches/take_kernels.rs
index 37b83a5e33ed..fb231771681c 100644
--- a/arrow/benches/take_kernels.rs
+++ b/arrow/benches/take_kernels.rs
@@ -195,14 +195,26 @@ fn add_benchmark(c: &mut Criterion) {
 
     let values = create_fsb_array(1024, 0.0, 12);
     let indices = create_random_index(1024, 0.0);
-    c.bench_function("take primitive fsb value len: 12, indices: 1024", |b| {
+    c.bench_function("take fsb value len: 12, indices: 1024", |b| {
         b.iter(|| bench_take(&values, &indices))
     });
 
     let values = create_fsb_array(1024, 0.5, 12);
     let indices = create_random_index(1024, 0.0);
+    c.bench_function("take fsb value len: 12, null values, indices: 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_fsb_array(1024, 0.0, 16);
+    let indices = create_random_index(1024, 0.0);
+    c.bench_function("take fsb value optimized len: 16, indices: 1024", |b| {
+        b.iter(|| bench_take(&values, &indices))
+    });
+
+    let values = create_fsb_array(1024, 0.5, 16);
+    let indices = create_random_index(1024, 0.0);
     c.bench_function(
-        "take primitive fsb value len: 12, null values, indices: 1024",
+        "take fsb value optimized len: 16, null values, indices: 1024",
         |b| b.iter(|| bench_take(&values, &indices)),
     );
 }
diff --git a/arrow/tests/arithmetic.rs b/arrow/tests/arithmetic.rs
index cc6a97e123f8..5d024f715a1e 100644
--- a/arrow/tests/arithmetic.rs
+++ b/arrow/tests/arithmetic.rs
@@ -76,7 +76,7 @@ fn test_timestamp_with_timezone_impl<T: ArrowTimestampType>(tz_str: &str) {
             .naive_utc(),
     ]
     .into_iter()
-    .map(|x| T::make_value(x).unwrap())
+    .map(|x| T::from_naive_datetime(x, None).unwrap())
     .collect();
 
     let a = PrimitiveArray::<T>::new(values, None).with_timezone(tz_str);
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index 511dc1e8bfcd..c24d0992a473 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -1151,3 +1151,91 @@ fn test_fixed_size_list_append() {
     .unwrap();
     assert_eq!(finished, expected_fixed_size_list_data);
 }
+
+#[test]
+fn test_extend_nulls_sparse_union() {
+    let fields = UnionFields::try_new(
+        vec![0, 1],
+        vec![
+            Field::new("null", DataType::Null, true),
+            Field::new("str", DataType::Utf8, true),
+        ],
+    )
+    .unwrap();
+
+    let type_ids = ScalarBuffer::from(vec![1i8]);
+    let child_null = Arc::new(NullArray::new(1)) as ArrayRef;
+    let child_str = Arc::new(StringArray::from(vec![Some("hello")])) as ArrayRef;
+    let union_array = UnionArray::try_new(
+        fields.clone(),
+        type_ids,
+        None, // sparse
+        vec![child_null, child_str],
+    )
+    .unwrap();
+
+    let data = union_array.to_data();
+    let mut mutable = MutableArrayData::new(vec![&data], true, 4);
+    mutable.extend(0, 0, 1); // copy the first element
+    mutable.extend_nulls(2); // add two nulls
+    let result = mutable.freeze();
+
+    // Union arrays must not have a null bitmap per Arrow spec
+    assert!(result.nulls().is_none());
+
+    let result_array = UnionArray::from(result);
+    assert_eq!(result_array.len(), 3);
+    // First element should be type_id 1 (str)
+    assert_eq!(result_array.type_id(0), 1);
+    // Null elements use the first type_id (0)
+    assert_eq!(result_array.type_id(1), 0);
+    assert_eq!(result_array.type_id(2), 0);
+    // All children should have length 3 (sparse invariant)
+    assert_eq!(result_array.child(0).len(), 3);
+    assert_eq!(result_array.child(1).len(), 3);
+}
+
+#[test]
+fn test_extend_nulls_dense_union() {
+    let fields = UnionFields::try_new(
+        vec![0, 1],
+        vec![
+            Field::new("i", DataType::Int32, true),
+            Field::new("str", DataType::Utf8, true),
+        ],
+    )
+    .unwrap();
+
+    let type_ids = ScalarBuffer::from(vec![1i8]);
+    let offsets = ScalarBuffer::from(vec![0i32]);
+    let child_int = Arc::new(Int32Array::new_null(0)) as ArrayRef;
+    let child_str = Arc::new(StringArray::from(vec![Some("hello")])) as ArrayRef;
+    let union_array = UnionArray::try_new(
+        fields.clone(),
+        type_ids,
+        Some(offsets),
+        vec![child_int, child_str],
+    )
+    .unwrap();
+
+    let data = union_array.to_data();
+    let mut mutable = MutableArrayData::new(vec![&data], true, 4);
+    mutable.extend(0, 0, 1); // copy the first element
+    mutable.extend_nulls(2); // add two nulls
+    let result = mutable.freeze();
+
+    // Union arrays must not have a null bitmap per Arrow spec
+    assert!(result.nulls().is_none());
+
+    let result_array = UnionArray::from(result);
+    assert_eq!(result_array.len(), 3);
+    // First element is type_id 1 (str)
+    assert_eq!(result_array.type_id(0), 1);
+    // Null elements use the first type_id (0)
+    assert_eq!(result_array.type_id(1), 0);
+    assert_eq!(result_array.type_id(2), 0);
+    // First child (int) should have 2 null entries from extend_nulls
+    assert_eq!(result_array.child(0).len(), 2);
+    // Second child (str) should have 1 entry from extend
+    assert_eq!(result_array.child(1).len(), 1);
+}
diff --git a/dev/release/update_change_log.sh b/dev/release/update_change_log.sh
index b444cae64911..ae7bf81b0cca 100755
--- a/dev/release/update_change_log.sh
+++ b/dev/release/update_change_log.sh
@@ -29,8 +29,8 @@
 
 set -e
 
-SINCE_TAG="57.3.0"
-FUTURE_RELEASE="58.0.0"
+SINCE_TAG="58.0.0"
+FUTURE_RELEASE="58.1.0"
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 SOURCE_TOP_DIR="$(cd "${SOURCE_DIR}/../../" && pwd)"
diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh
index 2629d362aaff..d8b888effef1 100755
--- a/dev/release/verify-release-candidate.sh
+++ b/dev/release/verify-release-candidate.sh
@@ -33,23 +33,27 @@ set -o pipefail
 
 SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]:-$0}")" && pwd)"
 ARROW_DIR="$(dirname $(dirname ${SOURCE_DIR}))"
-ARROW_DIST_URL='https://dist.apache.org/repos/dist/dev/arrow'
+ARROW_RC_URL="https://dist.apache.org/repos/dist/dev/arrow"
+ARROW_KEYS_URL="https://www.apache.org/dyn/closer.lua?action=download&filename=arrow/KEYS"
 
-download_dist_file() {
+download_file() {
   curl \
     --silent \
     --show-error \
     --fail \
     --location \
-    --remote-name $ARROW_DIST_URL/$1
+    --output "$2" \
+    "$1"
 }
 
 download_rc_file() {
-  download_dist_file apache-arrow-rs-${VERSION}-rc${RC_NUMBER}/$1
+  download_file \
+  "${ARROW_RC_URL}/apache-arrow-rs-${VERSION}-rc${RC_NUMBER}/$1" \
+  "$1"
 }
 
 import_gpg_keys() {
-  download_dist_file KEYS
+  download_file "${ARROW_KEYS_URL}" KEYS
   gpg --import KEYS
 }
 
diff --git a/parquet-variant-compute/src/arrow_to_variant.rs b/parquet-variant-compute/src/arrow_to_variant.rs
index be241a9a4e00..03a84109ffa0 100644
--- a/parquet-variant-compute/src/arrow_to_variant.rs
+++ b/parquet-variant-compute/src/arrow_to_variant.rs
@@ -16,8 +16,8 @@
 // under the License.
 
 use arrow::array::{
-    Array, ArrayRef, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray,
-    GenericListViewArray, GenericStringArray, OffsetSizeTrait, PrimitiveArray,
+    Array, AsArray, FixedSizeListArray, GenericBinaryArray, GenericListArray, GenericListViewArray,
+    GenericStringArray, ListLikeArray, OffsetSizeTrait, PrimitiveArray,
 };
 use arrow::compute::{CastOptions, kernels::cast};
 use arrow::datatypes::{
@@ -32,7 +32,6 @@ use parquet_variant::{
     VariantDecimal16, VariantDecimalType,
 };
 use std::collections::HashMap;
-use std::ops::Range;
 
 // ============================================================================
 // Row-oriented builders for efficient Arrow-to-Variant conversion
@@ -552,54 +551,6 @@ impl<'a, L: ListLikeArray> ListArrowToVariantBuilder<'a, L> {
     }
 }
 
-/// Trait for list-like arrays that can provide element ranges
-pub(crate) trait ListLikeArray: Array {
-    /// Get the values array
-    fn values(&self) -> &ArrayRef;
-
-    /// Get the start and end indices for a list element
-    fn element_range(&self, index: usize) -> Range<usize>;
-}
-
-impl<O: OffsetSizeTrait> ListLikeArray for GenericListArray<O> {
-    fn values(&self) -> &ArrayRef {
-        self.values()
-    }
-
-    fn element_range(&self, index: usize) -> Range<usize> {
-        let offsets = self.offsets();
-        let start = offsets[index].as_usize();
-        let end = offsets[index + 1].as_usize();
-        start..end
-    }
-}
-
-impl<O: OffsetSizeTrait> ListLikeArray for GenericListViewArray<O> {
-    fn values(&self) -> &ArrayRef {
-        self.values()
-    }
-
-    fn element_range(&self, index: usize) -> Range<usize> {
-        let offsets = self.value_offsets();
-        let sizes = self.value_sizes();
-        let offset = offsets[index].as_usize();
-        let size = sizes[index].as_usize();
-        offset..(offset + size)
-    }
-}
-
-impl ListLikeArray for FixedSizeListArray {
-    fn values(&self) -> &ArrayRef {
-        self.values()
-    }
-
-    fn element_range(&self, index: usize) -> Range<usize> {
-        let value_length = self.value_length().as_usize();
-        let offset = index * value_length;
-        offset..(offset + value_length)
-    }
-}
-
 /// Struct builder for StructArray
 pub(crate) struct StructArrowToVariantBuilder<'a> {
     struct_array: &'a arrow::array::StructArray,
diff --git a/parquet-variant-compute/src/shred_variant.rs b/parquet-variant-compute/src/shred_variant.rs
index c60c602baa37..d80d2f9863f6 100644
--- a/parquet-variant-compute/src/shred_variant.rs
+++ b/parquet-variant-compute/src/shred_variant.rs
@@ -84,7 +84,7 @@ pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result<Variant
         as_type,
         &cast_options,
         array.len(),
-        true,
+        NullValue::TopLevelVariant,
     )?;
     for i in 0..array.len() {
         if array.is_null(i) {
@@ -102,11 +102,42 @@ pub fn shred_variant(array: &VariantArray, as_type: &DataType) -> Result<Variant
     ))
 }
 
+/// Controls how `append_null` is encoded for a shredded `(value, typed_value)` pair.
+///
+/// | Mode | Struct validity bit | `value` | `typed_value` | Meaning |
+/// | --- | --- | --- | --- | --- |
+/// | `TopLevelVariant` | null | NULL | NULL | SQL NULL at the top-level variant row |
+/// | `ObjectField` | non-null | NULL | NULL | Missing object field |
+/// | `ArrayElement` | non-null | `Variant::Null` | NULL | Explicit null array element |
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub(crate) enum NullValue {
+    TopLevelVariant,
+    ObjectField,
+    ArrayElement,
+}
+
+impl NullValue {
+    fn append_to(
+        self,
+        nulls: &mut NullBufferBuilder,
+        value_builder: &mut VariantValueArrayBuilder,
+    ) {
+        match self {
+            Self::TopLevelVariant => nulls.append_null(),
+            Self::ObjectField | Self::ArrayElement => nulls.append_non_null(),
+        }
+        match self {
+            Self::TopLevelVariant | Self::ObjectField => value_builder.append_null(),
+            Self::ArrayElement => value_builder.append_value(Variant::Null),
+        }
+    }
+}
+
 pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
     data_type: &'a DataType,
     cast_options: &'a CastOptions,
     capacity: usize,
-    top_level: bool,
+    null_value: NullValue,
 ) -> Result<VariantToShreddedVariantRowBuilder<'a>> {
     let builder = match data_type {
         DataType::Struct(fields) => {
@@ -114,7 +145,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
                 fields,
                 cast_options,
                 capacity,
-                top_level,
+                null_value,
             )?;
             VariantToShreddedVariantRowBuilder::Object(typed_value_builder)
         }
@@ -127,6 +158,7 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
                 data_type,
                 cast_options,
                 capacity,
+                null_value,
             )?;
             VariantToShreddedVariantRowBuilder::Array(typed_value_builder)
         }
@@ -147,14 +179,16 @@ pub(crate) fn make_variant_to_shredded_variant_arrow_row_builder<'a>(
         | DataType::Timestamp(TimeUnit::Microsecond | TimeUnit::Nanosecond, _)
         | DataType::Binary
         | DataType::BinaryView
+        | DataType::LargeBinary
         | DataType::Utf8
         | DataType::Utf8View
+        | DataType::LargeUtf8
         | DataType::FixedSizeBinary(16) // UUID
         => {
             let builder =
                 make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
             let typed_value_builder =
-                VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, top_level);
+                VariantToShreddedPrimitiveVariantRowBuilder::new(builder, capacity, null_value);
             VariantToShreddedVariantRowBuilder::Primitive(typed_value_builder)
         }
         DataType::FixedSizeBinary(_) => {
@@ -202,33 +236,31 @@ impl<'a> VariantToShreddedVariantRowBuilder<'a> {
     }
 }
 
-/// A top-level variant shredder -- appending NULL produces typed_value=NULL and value=Variant::Null
+/// A shredded primitive field builder.
 pub(crate) struct VariantToShreddedPrimitiveVariantRowBuilder<'a> {
     value_builder: VariantValueArrayBuilder,
     typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
     nulls: NullBufferBuilder,
-    top_level: bool,
+    null_value: NullValue,
 }
 
 impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
     pub(crate) fn new(
         typed_value_builder: PrimitiveVariantToArrowRowBuilder<'a>,
         capacity: usize,
-        top_level: bool,
+        null_value: NullValue,
     ) -> Self {
         Self {
             value_builder: VariantValueArrayBuilder::new(capacity),
             typed_value_builder,
             nulls: NullBufferBuilder::new(capacity),
-            top_level,
+            null_value,
         }
     }
 
     fn append_null(&mut self) -> Result<()> {
-        // Only the top-level struct that represents the variant can be nullable; object fields and
-        // array elements are non-nullable.
-        self.nulls.append(!self.top_level);
-        self.value_builder.append_null();
+        self.null_value
+            .append_to(&mut self.nulls, &mut self.value_builder);
         self.typed_value_builder.append_null()
     }
 
@@ -254,6 +286,8 @@ impl<'a> VariantToShreddedPrimitiveVariantRowBuilder<'a> {
 pub(crate) struct VariantToShreddedArrayVariantRowBuilder<'a> {
     value_builder: VariantValueArrayBuilder,
     typed_value_builder: ArrayVariantToArrowRowBuilder<'a>,
+    nulls: NullBufferBuilder,
+    null_value: NullValue,
 }
 
 impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
@@ -261,6 +295,7 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
         data_type: &'a DataType,
         cast_options: &'a CastOptions,
         capacity: usize,
+        null_value: NullValue,
     ) -> Result<Self> {
         Ok(Self {
             value_builder: VariantValueArrayBuilder::new(capacity),
@@ -269,11 +304,14 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
                 cast_options,
                 capacity,
             )?,
+            nulls: NullBufferBuilder::new(capacity),
+            null_value,
         })
     }
 
     fn append_null(&mut self) -> Result<()> {
-        self.value_builder.append_value(Variant::Null);
+        self.null_value
+            .append_to(&mut self.nulls, &mut self.value_builder);
         self.typed_value_builder.append_null()?;
         Ok(())
     }
@@ -283,12 +321,14 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
         // If the variant is an array, value must be null.
         match variant {
             Variant::List(list) => {
+                self.nulls.append_non_null();
                 self.value_builder.append_null();
                 self.typed_value_builder
                     .append_value(&Variant::List(list))?;
                 Ok(true)
             }
             other => {
+                self.nulls.append_non_null();
                 self.value_builder.append_value(other);
                 self.typed_value_builder.append_null()?;
                 Ok(false)
@@ -296,13 +336,11 @@ impl<'a> VariantToShreddedArrayVariantRowBuilder<'a> {
         }
     }
 
-    fn finish(self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
+    fn finish(mut self) -> Result<(BinaryViewArray, ArrayRef, Option<NullBuffer>)> {
         Ok((
             self.value_builder.build()?,
             self.typed_value_builder.finish()?,
-            // All elements of an array must be present (not missing) because
-            // the array Variant encoding does not allow missing elements
-            None,
+            self.nulls.finish(),
         ))
     }
 }
@@ -312,7 +350,7 @@ pub(crate) struct VariantToShreddedObjectVariantRowBuilder<'a> {
     typed_value_builders: IndexMap<&'a str, VariantToShreddedVariantRowBuilder<'a>>,
     typed_value_nulls: NullBufferBuilder,
     nulls: NullBufferBuilder,
-    top_level: bool,
+    null_value: NullValue,
 }
 
 impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
@@ -320,14 +358,14 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
         fields: &'a Fields,
         cast_options: &'a CastOptions,
         capacity: usize,
-        top_level: bool,
+        null_value: NullValue,
     ) -> Result<Self> {
         let typed_value_builders = fields.iter().map(|field| {
             let builder = make_variant_to_shredded_variant_arrow_row_builder(
                 field.data_type(),
                 cast_options,
                 capacity,
-                false,
+                NullValue::ObjectField,
             )?;
             Ok((field.name().as_str(), builder))
         });
@@ -336,15 +374,13 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
             typed_value_builders: typed_value_builders.collect::<Result<_>>()?,
             typed_value_nulls: NullBufferBuilder::new(capacity),
             nulls: NullBufferBuilder::new(capacity),
-            top_level,
+            null_value,
         })
     }
 
     fn append_null(&mut self) -> Result<()> {
-        // Only the top-level struct that represents the variant can be nullable; object fields and
-        // array elements are non-nullable.
-        self.nulls.append(!self.top_level);
-        self.value_builder.append_null();
+        self.null_value
+            .append_to(&mut self.nulls, &mut self.value_builder);
         self.typed_value_nulls.append_null();
         for (_, typed_value_builder) in &mut self.typed_value_builders {
             typed_value_builder.append_null()?;
@@ -652,10 +688,10 @@ impl VariantSchemaNode {
 mod tests {
     use super::*;
     use crate::VariantArrayBuilder;
-    use crate::arrow_to_variant::ListLikeArray;
     use arrow::array::{
         Array, BinaryViewArray, FixedSizeBinaryArray, Float64Array, GenericListArray,
-        GenericListViewArray, Int64Array, ListArray, OffsetSizeTrait, PrimitiveArray, StringArray,
+        GenericListViewArray, Int64Array, LargeBinaryArray, LargeStringArray, ListArray,
+        ListLikeArray, OffsetSizeTrait, PrimitiveArray, StringArray,
     };
     use arrow::datatypes::{
         ArrowPrimitiveType, DataType, Field, Fields, Int64Type, TimeUnit, UnionFields, UnionMode,
@@ -667,6 +703,12 @@ mod tests {
     use std::sync::Arc;
     use uuid::Uuid;
 
+    const NULL_VALUES: [NullValue; 3] = [
+        NullValue::TopLevelVariant,
+        NullValue::ObjectField,
+        NullValue::ArrayElement,
+    ];
+
     #[derive(Clone)]
     enum VariantValue<'a> {
         Value(Variant<'a, 'a>),
@@ -879,7 +921,9 @@ mod tests {
                                 expected_variant.clone()
                             );
                         }
-                        None => unreachable!(),
+                        None => {
+                            assert!(fallbacks.0.is_null(idx));
+                        }
                     }
                 }
             }
@@ -947,6 +991,121 @@ mod tests {
         }
     }
 
+    fn assert_append_null_mode_value_and_struct_nulls(
+        mode: NullValue,
+        value: &BinaryViewArray,
+        nulls: Option<&arrow::buffer::NullBuffer>,
+    ) {
+        if mode == NullValue::TopLevelVariant {
+            assert!(nulls.is_some_and(|n| n.is_null(0)));
+        } else {
+            assert!(nulls.is_none());
+        }
+
+        if mode == NullValue::ArrayElement {
+            assert!(value.is_valid(0));
+            assert_eq!(
+                Variant::new(EMPTY_VARIANT_METADATA_BYTES, value.value(0)),
+                Variant::Null
+            );
+        } else {
+            assert!(value.is_null(0));
+        }
+    }
+
+    #[test]
+    fn test_append_null_mode_semantics_primitive_builder() {
+        let cast_options = arrow::compute::CastOptions::default();
+
+        for mode in NULL_VALUES {
+            let mut primitive_builder = make_variant_to_shredded_variant_arrow_row_builder(
+                &DataType::Int64,
+                &cast_options,
+                1,
+                mode,
+            )
+            .unwrap();
+            primitive_builder.append_null().unwrap();
+            let (primitive_value, primitive_typed_value, primitive_nulls) =
+                primitive_builder.finish().unwrap();
+            let primitive_typed_value = primitive_typed_value
+                .as_any()
+                .downcast_ref::<Int64Array>()
+                .unwrap();
+
+            assert!(primitive_typed_value.is_null(0));
+            assert_append_null_mode_value_and_struct_nulls(
+                mode,
+                &primitive_value,
+                primitive_nulls.as_ref(),
+            );
+        }
+    }
+
+    #[test]
+    fn test_append_null_mode_semantics_array_builder() {
+        let cast_options = arrow::compute::CastOptions::default();
+        let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
+
+        for mode in NULL_VALUES {
+            let mut array_builder = make_variant_to_shredded_variant_arrow_row_builder(
+                &list_type,
+                &cast_options,
+                1,
+                mode,
+            )
+            .unwrap();
+            array_builder.append_null().unwrap();
+            let (value, typed_value, nulls) = array_builder.finish().unwrap();
+
+            assert_append_null_mode_value_and_struct_nulls(mode, &value, nulls.as_ref());
+
+            let typed_value = typed_value.as_any().downcast_ref::<ListArray>().unwrap();
+            assert_eq!(typed_value.len(), 1);
+            assert!(typed_value.is_null(0));
+            assert_eq!(typed_value.values().len(), 0);
+        }
+    }
+
+    #[test]
+    fn test_append_null_mode_semantics_object_builder() {
+        let cast_options = arrow::compute::CastOptions::default();
+        let object_type = DataType::Struct(Fields::from(vec![
+            Field::new("id", DataType::Int64, true),
+            Field::new("name", DataType::Utf8, true),
+        ]));
+
+        for mode in NULL_VALUES {
+            let mut object_builder = make_variant_to_shredded_variant_arrow_row_builder(
+                &object_type,
+                &cast_options,
+                1,
+                mode,
+            )
+            .unwrap();
+            object_builder.append_null().unwrap();
+            let (value, typed_value, nulls) = object_builder.finish().unwrap();
+
+            assert_append_null_mode_value_and_struct_nulls(mode, &value, nulls.as_ref());
+
+            let typed_struct = typed_value
+                .as_any()
+                .downcast_ref::<arrow::array::StructArray>()
+                .unwrap();
+            assert_eq!(typed_struct.len(), 1);
+            assert!(typed_struct.is_null(0));
+
+            for field_name in ["id", "name"] {
+                let field = ShreddedVariantFieldArray::try_new(
+                    typed_struct.column_by_name(field_name).unwrap(),
+                )
+                .unwrap();
+                assert!(field.value_field().unwrap().is_null(0));
+                assert!(field.typed_value_field().unwrap().is_null(0));
+            }
+        }
+    }
+
     #[test]
     fn test_already_shredded_input_error() {
         // Create a VariantArray that already has typed_value_field
@@ -1144,6 +1303,118 @@ mod tests {
         assert!(typed_value_float64.is_null(2)); // string doesn't convert
     }
 
+    #[test]
+    fn test_largeutf8_shredding() {
+        let input = VariantArray::from_iter(vec![
+            Some(Variant::from("hello")),
+            Some(Variant::from(42i64)),
+            None,
+            Some(Variant::Null),
+            Some(Variant::from("world")),
+        ]);
+
+        let result = shred_variant(&input, &DataType::LargeUtf8).unwrap();
+        let metadata = result.metadata_field();
+        let value = result.value_field().unwrap();
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<LargeStringArray>()
+            .unwrap();
+
+        assert_eq!(result.len(), 5);
+
+        // Row 0: string shreds to typed_value
+        assert!(result.is_valid(0));
+        assert!(value.is_null(0));
+        assert_eq!(typed_value.value(0), "hello");
+
+        // Row 1: integer falls back to value
+        assert!(result.is_valid(1));
+        assert!(value.is_valid(1));
+        assert!(typed_value.is_null(1));
+        assert_eq!(
+            Variant::new(metadata.value(1), value.value(1)),
+            Variant::from(42i64)
+        );
+
+        // Row 2: top-level null
+        assert!(result.is_null(2));
+        assert!(value.is_null(2));
+        assert!(typed_value.is_null(2));
+
+        // Row 3: variant null falls back to value
+        assert!(result.is_valid(3));
+        assert!(value.is_valid(3));
+        assert!(typed_value.is_null(3));
+        assert_eq!(
+            Variant::new(metadata.value(3), value.value(3)),
+            Variant::Null
+        );
+
+        // Row 4: string shreds to typed_value
+        assert!(result.is_valid(4));
+        assert!(value.is_null(4));
+        assert_eq!(typed_value.value(4), "world");
+    }
+
+    #[test]
+    fn test_largebinary_shredding() {
+        let input = VariantArray::from_iter(vec![
+            Some(Variant::from(&b"\x00\x01\x02"[..])),
+            Some(Variant::from("not_binary")),
+            None,
+            Some(Variant::Null),
+            Some(Variant::from(&b"\xff\xaa"[..])),
+        ]);
+
+        let result = shred_variant(&input, &DataType::LargeBinary).unwrap();
+        let metadata = result.metadata_field();
+        let value = result.value_field().unwrap();
+        let typed_value = result
+            .typed_value_field()
+            .unwrap()
+            .as_any()
+            .downcast_ref::<LargeBinaryArray>()
+            .unwrap();
+
+        assert_eq!(result.len(), 5);
+
+        // Row 0: binary shreds to typed_value
+        assert!(result.is_valid(0));
+        assert!(value.is_null(0));
+        assert_eq!(typed_value.value(0), &[0x00, 0x01, 0x02]);
+
+        // Row 1: string falls back to value
+        assert!(result.is_valid(1));
+        assert!(value.is_valid(1));
+        assert!(typed_value.is_null(1));
+        assert_eq!(
+            Variant::new(metadata.value(1), value.value(1)),
+            Variant::from("not_binary")
+        );
+
+        // Row 2: top-level null
+        assert!(result.is_null(2));
+        assert!(value.is_null(2));
+        assert!(typed_value.is_null(2));
+
+        // Row 3: variant null falls back to value
+        assert!(result.is_valid(3));
+        assert!(value.is_valid(3));
+        assert!(typed_value.is_null(3));
+        assert_eq!(
+            Variant::new(metadata.value(3), value.value(3)),
+            Variant::Null
+        );
+
+        // Row 4: binary shreds to typed_value
+        assert!(result.is_valid(4));
+        assert!(value.is_null(4));
+        assert_eq!(typed_value.value(4), &[0xff, 0xaa]);
+    }
+
     #[test]
     fn test_invalid_shredded_types_rejected() {
         let input = VariantArray::from_iter([Variant::from(42)]);
@@ -1156,8 +1427,6 @@ mod tests {
             DataType::Time32(TimeUnit::Second),
             DataType::Time64(TimeUnit::Nanosecond),
             DataType::Timestamp(TimeUnit::Millisecond, None),
-            DataType::LargeBinary,
-            DataType::LargeUtf8,
             DataType::FixedSizeBinary(17),
             DataType::Union(
                 UnionFields::from_fields(vec![
@@ -1226,13 +1495,7 @@ mod tests {
             5,
             &[0, 3, 6, 6, 6, 6],
             &[Some(3), Some(3), None, None, Some(0)],
-            &[
-                None,
-                None,
-                Some(Variant::from("not a list")),
-                Some(Variant::Null),
-                None,
-            ],
+            &[None, None, Some(Variant::from("not a list")), None, None],
             (
                 &[Some(1), Some(2), Some(3), Some(1), None, None],
                 &[
@@ -1302,13 +1565,7 @@ mod tests {
             5,
             &[0, 3, 6, 6, 6],
             &[Some(3), Some(3), None, None, Some(0)],
-            &[
-                None,
-                None,
-                Some(Variant::from("not a list")),
-                Some(Variant::Null),
-                None,
-            ],
+            &[None, None, Some(Variant::from("not a list")), None, None],
             (
                 &[Some(1), Some(2), Some(3), Some(1), None, None],
                 &[
@@ -1410,12 +1667,7 @@ mod tests {
             4,
             &[0, 3, 6, 6, 6],
             &[Some(3), Some(3), None, None],
-            &[
-                None,
-                None,
-                Some(Variant::from("not a list")),
-                Some(Variant::Null),
-            ],
+            &[None, None, Some(Variant::from("not a list")), None],
         );
 
         let outer_elements =
@@ -1503,7 +1755,7 @@ mod tests {
             3,
             &[0, 2, 2, 2],
             &[Some(2), None, None],
-            &[None, Some(Variant::from("not a list")), Some(Variant::Null)],
+            &[None, Some(Variant::from("not a list")), None],
         );
 
         // Validate nested struct fields for each element
@@ -1989,13 +2241,7 @@ mod tests {
             scores_field.len(),
             &[0i32, 2, 4, 4, 4, 4],
             &[Some(2), Some(2), None, None, None],
-            &[
-                None,
-                None,
-                Some(Variant::Null),
-                Some(Variant::Null),
-                Some(Variant::Null),
-            ],
+            &[None, None, None, None, None],
             (
                 &[Some(10), Some(20), None, None],
                 &[None, None, Some(Variant::from("oops")), Some(Variant::Null)],
diff --git a/parquet-variant-compute/src/type_conversion.rs b/parquet-variant-compute/src/type_conversion.rs
index 6a0a743c9029..7b9eb67d1a95 100644
--- a/parquet-variant-compute/src/type_conversion.rs
+++ b/parquet-variant-compute/src/type_conversion.rs
@@ -17,11 +17,12 @@
 
 //! Module for transforming a typed arrow `Array` to `VariantArray`.
 
-use arrow::compute::{DecimalCast, rescale_decimal};
+use arrow::compute::{CastOptions, DecimalCast, rescale_decimal};
 use arrow::datatypes::{
     self, ArrowPrimitiveType, ArrowTimestampType, Decimal32Type, Decimal64Type, Decimal128Type,
     DecimalType,
 };
+use arrow::error::{ArrowError, Result};
 use chrono::Timelike;
 use parquet_variant::{Variant, VariantDecimal4, VariantDecimal8, VariantDecimal16};
 
@@ -37,6 +38,27 @@ pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
     fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
 }
 
+/// Cast a single `Variant` value with safe/strict semantics.
+///
+/// Returns `Ok(Some(_))` on successful conversion.
+/// Returns `Ok(None)` when conversion fails in safe mode or the source value is `Variant::Null`.
+/// Returns `Err(_)` when conversion fails in strict mode.
+pub(crate) fn variant_cast_with_options<'a, 'm, 'v, T>(
+    variant: &'a Variant<'m, 'v>,
+    cast_options: &CastOptions<'_>,
+    cast: impl FnOnce(&'a Variant<'m, 'v>) -> Option<T>,
+) -> Result<Option<T>> {
+    if let Some(value) = cast(variant) {
+        Ok(Some(value))
+    } else if matches!(variant, Variant::Null) || cast_options.safe {
+        Ok(None)
+    } else {
+        Err(ArrowError::CastError(format!(
+            "Failed to cast variant value {variant:?}"
+        )))
+    }
+}
+
 /// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types
 macro_rules! impl_primitive_from_variant {
     ($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
@@ -94,7 +116,7 @@ impl_primitive_from_variant!(datatypes::Time32MillisecondType, as_time_utc, |v|
     }
 });
 impl_primitive_from_variant!(datatypes::Time64MicrosecondType, as_time_utc, |v| {
-    Some((v.num_seconds_from_midnight() * 1_000_000 + v.nanosecond() / 1_000) as i64)
+    Some(v.num_seconds_from_midnight() as i64 * 1_000_000 + v.nanosecond() as i64 / 1_000)
 });
 impl_primitive_from_variant!(datatypes::Time64NanosecondType, as_time_utc, |v| {
     // convert micro to nano seconds
@@ -109,7 +131,7 @@ impl_timestamp_from_variant!(
         if timestamp.nanosecond() != 0 {
             None
         } else {
-            Self::make_value(timestamp)
+            Self::from_naive_datetime(timestamp, None)
         }
     }
 );
@@ -122,7 +144,7 @@ impl_timestamp_from_variant!(
         if timestamp.nanosecond() != 0 {
             None
         } else {
-            Self::make_value(timestamp.naive_utc())
+            Self::from_naive_datetime(timestamp.naive_utc(), None)
         }
     }
 );
@@ -135,7 +157,7 @@ impl_timestamp_from_variant!(
         if timestamp.nanosecond() % 1_000_000 != 0 {
             None
         } else {
-            Self::make_value(timestamp)
+            Self::from_naive_datetime(timestamp, None)
         }
     }
 );
@@ -148,7 +170,7 @@ impl_timestamp_from_variant!(
         if timestamp.nanosecond() % 1_000_000 != 0 {
             None
         } else {
-            Self::make_value(timestamp.naive_utc())
+            Self::from_naive_datetime(timestamp.naive_utc(), None)
         }
     }
 );
@@ -156,25 +178,25 @@ impl_timestamp_from_variant!(
     datatypes::TimestampMicrosecondType,
     as_timestamp_ntz_micros,
     ntz = true,
-    Self::make_value,
+    |timestamp| Self::from_naive_datetime(timestamp, None),
 );
 impl_timestamp_from_variant!(
     datatypes::TimestampMicrosecondType,
     as_timestamp_micros,
     ntz = false,
-    |timestamp| Self::make_value(timestamp.naive_utc())
+    |timestamp| Self::from_naive_datetime(timestamp.naive_utc(), None)
 );
 impl_timestamp_from_variant!(
     datatypes::TimestampNanosecondType,
     as_timestamp_ntz_nanos,
     ntz = true,
-    Self::make_value
+    |timestamp| Self::from_naive_datetime(timestamp, None)
 );
 impl_timestamp_from_variant!(
     datatypes::TimestampNanosecondType,
     as_timestamp_nanos,
     ntz = false,
-    |timestamp| Self::make_value(timestamp.naive_utc())
+    |timestamp| Self::from_naive_datetime(timestamp.naive_utc(), None)
 );
 
 /// Returns the unscaled integer representation for Arrow decimal type `O`
diff --git a/parquet-variant-compute/src/unshred_variant.rs b/parquet-variant-compute/src/unshred_variant.rs
index 37363fd9d085..2df36fa63f02 100644
--- a/parquet-variant-compute/src/unshred_variant.rs
+++ b/parquet-variant-compute/src/unshred_variant.rs
@@ -17,11 +17,11 @@
 
 //! Module for unshredding VariantArray by folding typed_value columns back into the value column.
 
-use crate::arrow_to_variant::ListLikeArray;
 use crate::{BorrowedShreddingState, VariantArray, VariantValueArrayBuilder};
 use arrow::array::{
-    Array, AsArray as _, BinaryViewArray, BooleanArray, FixedSizeBinaryArray, FixedSizeListArray,
-    GenericListArray, GenericListViewArray, PrimitiveArray, StringArray, StructArray,
+    Array, AsArray as _, BinaryArray, BinaryViewArray, BooleanArray, FixedSizeBinaryArray,
+    FixedSizeListArray, GenericListArray, GenericListViewArray, LargeBinaryArray, LargeStringArray,
+    ListLikeArray, PrimitiveArray, StringArray, StringViewArray, StructArray,
 };
 use arrow::buffer::NullBuffer;
 use arrow::datatypes::{
@@ -105,7 +105,11 @@ enum UnshredVariantRowBuilder<'a> {
     TimestampNanosecond(TimestampUnshredRowBuilder<'a, TimestampNanosecondType>),
     PrimitiveBoolean(UnshredPrimitiveRowBuilder<'a, BooleanArray>),
     PrimitiveString(UnshredPrimitiveRowBuilder<'a, StringArray>),
+    PrimitiveStringView(UnshredPrimitiveRowBuilder<'a, StringViewArray>),
+    PrimitiveLargeString(UnshredPrimitiveRowBuilder<'a, LargeStringArray>),
+    PrimitiveBinary(UnshredPrimitiveRowBuilder<'a, BinaryArray>),
     PrimitiveBinaryView(UnshredPrimitiveRowBuilder<'a, BinaryViewArray>),
+    PrimitiveLargeBinary(UnshredPrimitiveRowBuilder<'a, LargeBinaryArray>),
     PrimitiveUuid(UnshredPrimitiveRowBuilder<'a, FixedSizeBinaryArray>),
     List(ListUnshredVariantBuilder<'a, GenericListArray<i32>>),
     LargeList(ListUnshredVariantBuilder<'a, GenericListArray<i64>>),
@@ -146,7 +150,11 @@ impl<'a> UnshredVariantRowBuilder<'a> {
             Self::TimestampNanosecond(b) => b.append_row(builder, metadata, index),
             Self::PrimitiveBoolean(b) => b.append_row(builder, metadata, index),
             Self::PrimitiveString(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveStringView(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveLargeString(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveBinary(b) => b.append_row(builder, metadata, index),
             Self::PrimitiveBinaryView(b) => b.append_row(builder, metadata, index),
+            Self::PrimitiveLargeBinary(b) => b.append_row(builder, metadata, index),
             Self::PrimitiveUuid(b) => b.append_row(builder, metadata, index),
             Self::List(b) => b.append_row(builder, metadata, index),
             Self::LargeList(b) => b.append_row(builder, metadata, index),
@@ -226,7 +234,11 @@ impl<'a> UnshredVariantRowBuilder<'a> {
             }
             DataType::Boolean => primitive_builder!(PrimitiveBoolean, as_boolean),
             DataType::Utf8 => primitive_builder!(PrimitiveString, as_string),
+            DataType::Utf8View => primitive_builder!(PrimitiveStringView, as_string_view),
+            DataType::LargeUtf8 => primitive_builder!(PrimitiveLargeString, as_string),
+            DataType::Binary => primitive_builder!(PrimitiveBinary, as_binary),
             DataType::BinaryView => primitive_builder!(PrimitiveBinaryView, as_binary_view),
+            DataType::LargeBinary => primitive_builder!(PrimitiveLargeBinary, as_binary),
             DataType::FixedSizeBinary(16) => {
                 primitive_builder!(PrimitiveUuid, as_fixed_size_binary)
             }
@@ -405,7 +417,11 @@ macro_rules! impl_append_to_variant_builder {
 
 impl_append_to_variant_builder!(BooleanArray);
 impl_append_to_variant_builder!(StringArray);
+impl_append_to_variant_builder!(StringViewArray);
+impl_append_to_variant_builder!(LargeStringArray);
+impl_append_to_variant_builder!(BinaryArray);
 impl_append_to_variant_builder!(BinaryViewArray);
+impl_append_to_variant_builder!(LargeBinaryArray);
 impl_append_to_variant_builder!(PrimitiveArray<Int8Type>);
 impl_append_to_variant_builder!(PrimitiveArray<Int16Type>);
 impl_append_to_variant_builder!(PrimitiveArray<Int32Type>);
@@ -664,5 +680,98 @@ impl<'a, L: ListLikeArray> ListUnshredVariantBuilder<'a, L> {
     }
 }
 
-// TODO: This code is covered by tests in `parquet/tests/variant_integration.rs`. Does that suffice?
-// Or do we also need targeted stand-alone unit tests for full coverage?
+#[cfg(test)]
+mod tests {
+    use crate::VariantArray;
+    use arrow::array::{
+        BinaryArray, BinaryViewArray, LargeBinaryArray, LargeStringArray, StringViewArray,
+    };
+    use parquet_variant::Variant;
+
+    #[test]
+    fn test_unshred_utf8view_typed_value() {
+        let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
+        let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
+
+        let typed_value: arrow::array::ArrayRef = std::sync::Arc::new(StringViewArray::from(vec![
+            Some("hello"),
+            Some("middle"),
+            Some("world"),
+        ]));
+
+        let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
+
+        let result = crate::unshred_variant(&variant_array).unwrap();
+
+        assert_eq!(result.len(), 3);
+        assert_eq!(result.value(0), Variant::from("hello"));
+        assert_eq!(result.value(1), Variant::from("middle"));
+        assert_eq!(result.value(2), Variant::from("world"));
+    }
+
+    #[test]
+    fn test_unshred_largeutf8_typed_value() {
+        let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
+        let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
+
+        let typed_value: arrow::array::ArrayRef =
+            std::sync::Arc::new(LargeStringArray::from(vec![
+                Some("hello"),
+                Some("middle"),
+                Some("world"),
+            ]));
+
+        let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
+
+        let result = crate::unshred_variant(&variant_array).unwrap();
+
+        assert_eq!(result.len(), 3);
+        assert_eq!(result.value(0), Variant::from("hello"));
+        assert_eq!(result.value(1), Variant::from("middle"));
+        assert_eq!(result.value(2), Variant::from("world"));
+    }
+
+    #[test]
+    fn test_unshred_binary_typed_value() {
+        let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
+        let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
+
+        let typed_value: arrow::array::ArrayRef =
+            std::sync::Arc::new(BinaryArray::from_iter_values(vec![
+                &b"\x00\x01\x02"[..],
+                &b"\xff\xaa"[..],
+                &b"\xde\xad\xbe\xef"[..],
+            ]));
+
+        let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
+
+        let result = crate::unshred_variant(&variant_array).unwrap();
+
+        assert_eq!(result.len(), 3);
+        assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..]));
+        assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..]));
+        assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..]));
+    }
+
+    #[test]
+    fn test_unshred_largebinary_typed_value() {
+        let metadata_bytes: &[u8] = &[0x01, 0x00, 0x00];
+        let metadata = BinaryViewArray::from_iter_values(vec![metadata_bytes; 3]);
+
+        let typed_value: arrow::array::ArrayRef =
+            std::sync::Arc::new(LargeBinaryArray::from_iter_values(vec![
+                &b"\x00\x01\x02"[..],
+                &b"\xff\xaa"[..],
+                &b"\xde\xad\xbe\xef"[..],
+            ]));
+
+        let variant_array = VariantArray::from_parts(metadata, None, Some(typed_value), None);
+
+        let result = crate::unshred_variant(&variant_array).unwrap();
+
+        assert_eq!(result.len(), 3);
+        assert_eq!(result.value(0), Variant::from(&b"\x00\x01\x02"[..]));
+        assert_eq!(result.value(1), Variant::from(&b"\xff\xaa"[..]));
+        assert_eq!(result.value(2), Variant::from(&b"\xde\xad\xbe\xef"[..]));
+    }
+}
diff --git a/parquet-variant-compute/src/variant_array.rs b/parquet-variant-compute/src/variant_array.rs
index 250852d021bd..145de5edfb70 100644
--- a/parquet-variant-compute/src/variant_array.rs
+++ b/parquet-variant-compute/src/variant_array.rs
@@ -1181,16 +1181,23 @@ fn canonicalize_and_verify_data_type(data_type: &DataType) -> Result<Cow<'_, Dat
         FixedSizeBinary(16) => borrow!(),
         FixedSizeBinary(_) | FixedSizeList(..) => fail!(),
 
-        // We can _possibly_ allow (some of) these some day?
-        ListView(_) | LargeList(_) | LargeListView(_) => {
-            fail!()
-        }
-
-        // Lists and struct are allowed, maps and unions are not
+        // List-like containers and struct are allowed, maps and unions are not
         List(field) => match canonicalize_and_verify_field(field)? {
             Cow::Borrowed(_) => borrow!(),
             Cow::Owned(new_field) => Cow::Owned(DataType::List(new_field)),
         },
+        LargeList(field) => match canonicalize_and_verify_field(field)? {
+            Cow::Borrowed(_) => borrow!(),
+            Cow::Owned(new_field) => Cow::Owned(DataType::LargeList(new_field)),
+        },
+        ListView(field) => match canonicalize_and_verify_field(field)? {
+            Cow::Borrowed(_) => borrow!(),
+            Cow::Owned(new_field) => Cow::Owned(DataType::ListView(new_field)),
+        },
+        LargeListView(field) => match canonicalize_and_verify_field(field)? {
+            Cow::Borrowed(_) => borrow!(),
+            Cow::Owned(new_field) => Cow::Owned(DataType::LargeListView(new_field)),
+        },
         // Struct is used by the internal layout, and can also represent a shredded variant object.
         Struct(fields) => {
             // Avoid allocation unless at least one field changes, to avoid unnecessary deep cloning
@@ -1235,9 +1242,10 @@ mod test {
 
     use super::*;
     use arrow::array::{
-        BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, Int32Array,
-        Time64MicrosecondArray,
+        BinaryViewArray, Decimal32Array, Decimal64Array, Decimal128Array, Int32Array, Int64Array,
+        LargeListArray, LargeListViewArray, ListArray, ListViewArray, Time64MicrosecondArray,
     };
+    use arrow::buffer::{OffsetBuffer, ScalarBuffer};
     use arrow_schema::{Field, Fields};
     use parquet_variant::{EMPTY_VARIANT_METADATA_BYTES, ShortString};
 
@@ -1335,6 +1343,17 @@ mod test {
         Arc::new(Int32Array::from(vec![1]))
     }
 
+    fn make_variant_struct_with_typed_value(typed_value: ArrayRef) -> StructArray {
+        let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
+            EMPTY_VARIANT_METADATA_BYTES,
+            typed_value.len(),
+        ));
+        StructArrayBuilder::new()
+            .with_field("metadata", Arc::new(metadata), false)
+            .with_field("typed_value", typed_value, true)
+            .build()
+    }
+
     #[test]
     fn all_null_shredding_state() {
         // Verify the shredding state is AllNull
@@ -1420,6 +1439,81 @@ mod test {
         ));
     }
 
+    #[test]
+    fn canonicalize_and_verify_list_like_data_types() {
+        // `parquet/tests/variant_integration.rs` validates Parquet shredded-variant fixtures that
+        // use Parquet LIST encoding, but those fixtures do not cover Arrow-specific list container
+        // variants (`LargeList`, `ListView`, `LargeListView`) accepted by `VariantArray::try_new`.
+        let make_item_binary = || Arc::new(Field::new("item", DataType::Binary, true));
+        let make_item_binary_view = || Arc::new(Field::new("item", DataType::BinaryView, true));
+
+        let cases = vec![
+            (
+                DataType::LargeList(make_item_binary()),
+                DataType::LargeList(make_item_binary_view()),
+            ),
+            (
+                DataType::ListView(make_item_binary()),
+                DataType::ListView(make_item_binary_view()),
+            ),
+            (
+                DataType::LargeListView(make_item_binary()),
+                DataType::LargeListView(make_item_binary_view()),
+            ),
+        ];
+
+        for (input, expected) in cases {
+            assert_eq!(
+                canonicalize_and_verify_data_type(&input).unwrap().as_ref(),
+                &expected
+            );
+        }
+    }
+
+    #[test]
+    fn variant_array_try_new_supports_list_like_typed_value() {
+        let item_field = Arc::new(Field::new("item", DataType::Int64, true));
+        let values: ArrayRef = Arc::new(Int64Array::from(vec![Some(1), None, Some(3)]));
+
+        let typed_values = vec![
+            Arc::new(ListArray::new(
+                item_field.clone(),
+                OffsetBuffer::new(ScalarBuffer::from(vec![0, 2, 3])),
+                values.clone(),
+                None,
+            )) as ArrayRef,
+            Arc::new(LargeListArray::new(
+                item_field.clone(),
+                OffsetBuffer::new(ScalarBuffer::from(vec![0_i64, 2, 3])),
+                values.clone(),
+                None,
+            )) as ArrayRef,
+            Arc::new(ListViewArray::new(
+                item_field.clone(),
+                ScalarBuffer::from(vec![0, 2]),
+                ScalarBuffer::from(vec![2, 1]),
+                values.clone(),
+                None,
+            )) as ArrayRef,
+            Arc::new(LargeListViewArray::new(
+                item_field,
+                ScalarBuffer::from(vec![0_i64, 2]),
+                ScalarBuffer::from(vec![2_i64, 1]),
+                values,
+                None,
+            )) as ArrayRef,
+        ];
+
+        for typed_value in typed_values {
+            let input = make_variant_struct_with_typed_value(typed_value.clone());
+            let variant_array = VariantArray::try_new(&input).unwrap();
+            assert_eq!(
+                variant_array.typed_value_field().unwrap().data_type(),
+                typed_value.data_type(),
+            );
+        }
+    }
+
     #[test]
     fn test_variant_array_iterable() {
         let mut b = VariantArrayBuilder::new(6);
diff --git a/parquet-variant-compute/src/variant_get.rs b/parquet-variant-compute/src/variant_get.rs
index f9985084cc49..73906f70eb77 100644
--- a/parquet-variant-compute/src/variant_get.rs
+++ b/parquet-variant-compute/src/variant_get.rs
@@ -213,9 +213,20 @@ fn shredded_get_path(
         return Ok(shredded);
     }
 
-    // Structs are special. Recurse into each field separately, hoping to follow the shredding even
-    // further, and build up the final struct from those individually shredded results.
+    // Structs are special.
+    //
+    // For fully unshredded targets (`typed_value` absent), delegate to the row builder so we
+    // preserve struct-level cast semantics:
+    // - safe mode: non-object rows become NULL structs
+    // - strict mode: non-object rows raise a cast error
+    //
+    // For shredded/partially-shredded targets (`typed_value` present), recurse into each field
+    // separately to take advantage of deeper shredding in child fields.
     if let DataType::Struct(fields) = as_field.data_type() {
+        if target.typed_value_field().is_none() {
+            return shred_basic_variant(target, VariantPath::default(), Some(as_field));
+        }
+
         let children = fields
             .iter()
             .map(|field| {
@@ -334,7 +345,9 @@ mod test {
 
     use super::{GetOptions, variant_get};
     use crate::variant_array::{ShreddedVariantFieldArray, StructArrayBuilder};
-    use crate::{VariantArray, VariantArrayBuilder, json_to_variant};
+    use crate::{
+        VariantArray, VariantArrayBuilder, cast_to_variant, json_to_variant, shred_variant,
+    };
     use arrow::array::{
         Array, ArrayRef, AsArray, BinaryArray, BinaryViewArray, BooleanArray, Date32Array,
         Date64Array, Decimal32Array, Decimal64Array, Decimal128Array, Decimal256Array,
@@ -463,49 +476,96 @@ mod test {
         };
     }
 
+    /// Build a mixed input [typed, null, fallback, typed] and let shred_variant
+    /// generate the shredded fixture for the requested type.
     macro_rules! partially_shredded_variant_array_gen {
         ($func_name:ident,  $typed_value_array_gen: expr) => {
+            partially_shredded_variant_array_gen!(
+                $func_name,
+                $typed_value_array_gen,
+                Variant::from("n/a")
+            );
+        };
+        ($func_name:ident,  $typed_value_array_gen: expr, $fallback_variant:expr) => {
             fn $func_name() -> ArrayRef {
-                let (metadata, string_value) = {
-                    let mut builder = parquet_variant::VariantBuilder::new();
-                    builder.append_value("n/a");
-                    builder.finish()
-                };
-
-                let nulls = NullBuffer::from(vec![
-                    true,  // row 0 non null
-                    false, // row 1 is null
-                    true,  // row 2 non null
-                    true,  // row 3 non null
-                ]);
-
-                // metadata is the same for all rows
-                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
-
-                // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
-                // about why row1 is an empty but non null, value.
-                let values = BinaryViewArray::from(vec![
-                    None,                // row 0 is shredded, so no value
-                    Some(b"" as &[u8]),  // row 1 is null, so empty value (why?)
-                    Some(&string_value), // copy the string value "N/A"
-                    None,                // row 3 is shredded, so no value
-                ]);
-
-                let typed_value = $typed_value_array_gen();
-
-                let struct_array = StructArrayBuilder::new()
-                    .with_field("metadata", Arc::new(metadata), false)
-                    .with_field("typed_value", Arc::new(typed_value), true)
-                    .with_field("value", Arc::new(values), true)
-                    .with_nulls(nulls)
-                    .build();
-                ArrayRef::from(
-                    VariantArray::try_new(&struct_array).expect("should create variant array"),
-                )
+                let typed_value: ArrayRef = Arc::new($typed_value_array_gen());
+                let typed_as_variant = cast_to_variant(typed_value.as_ref())
+                    .expect("should cast typed array to variant");
+                let mut input_builder = VariantArrayBuilder::new(typed_as_variant.len());
+                input_builder.append_variant(typed_as_variant.value(0));
+                input_builder.append_null();
+                input_builder.append_variant($fallback_variant);
+                input_builder.append_variant(typed_as_variant.value(3));
+
+                let variant_array = shred_variant(&input_builder.build(), typed_value.data_type())
+                    .expect("should shred variant array");
+                ArrayRef::from(variant_array)
             }
         };
     }
 
+    // Fixture definitions grouped with the partially-shredded tests.
+    macro_rules! numeric_partially_shredded_variant_array_fn {
+        ($func:ident, $array_type:ident, $primitive_type:ty) => {
+            partially_shredded_variant_array_gen!($func, || $array_type::from(vec![
+                Some(<$primitive_type>::try_from(34u8).unwrap()),
+                None,
+                None,
+                Some(<$primitive_type>::try_from(100u8).unwrap()),
+            ]));
+        };
+    }
+
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_int8_variant_array,
+        Int8Array,
+        i8
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_int16_variant_array,
+        Int16Array,
+        i16
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_int32_variant_array,
+        Int32Array,
+        i32
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_int64_variant_array,
+        Int64Array,
+        i64
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_float32_variant_array,
+        Float32Array,
+        f32
+    );
+    numeric_partially_shredded_variant_array_fn!(
+        partially_shredded_float64_variant_array,
+        Float64Array,
+        f64
+    );
+
+    partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || {
+        arrow::array::BooleanArray::from(vec![Some(true), None, None, Some(false)])
+    });
+
+    partially_shredded_variant_array_gen!(
+        partially_shredded_utf8_variant_array,
+        || { StringArray::from(vec![Some("hello"), None, None, Some("world")]) },
+        Variant::from(42i32)
+    );
+
+    partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || {
+        Date32Array::from(vec![
+            Some(20348), // 2025-09-17
+            None,
+            None,
+            Some(20340), // 2025-09-09
+        ])
+    });
+
     #[test]
     fn get_variant_partially_shredded_int8_as_variant() {
         numeric_partially_shredded_test!(i8, partially_shredded_int8_variant_array);
@@ -566,7 +626,7 @@ mod test {
         // Expect the values are the same as the original values
         assert_eq!(result.value(0), Variant::from("hello"));
         assert!(!result.is_valid(1));
-        assert_eq!(result.value(2), Variant::from("n/a"));
+        assert_eq!(result.value(2), Variant::from(42i32));
         assert_eq!(result.value(3), Variant::from("world"));
     }
 
@@ -616,6 +676,153 @@ mod test {
         assert_eq!(result.value(3), Variant::from(&[4u8, 5u8, 6u8][..]));
     }
 
+    // Timestamp partially-shredded tests grouped with the other partially-shredded cases.
+    macro_rules! assert_variant_get_as_variant_array_with_default_option {
+        ($variant_array: expr, $array_expected: expr) => {{
+            let options = GetOptions::new();
+            let array = $variant_array;
+            let result = variant_get(&array, options).unwrap();
+            let result = VariantArray::try_new(&result).unwrap();
+
+            assert_eq!(result.len(), $array_expected.len());
+
+            for (idx, item) in $array_expected.into_iter().enumerate() {
+                match item {
+                    Some(item) => assert_eq!(result.value(idx), item),
+                    None => assert!(result.is_null(idx)),
+                }
+            }
+        }};
+    }
+
+    partially_shredded_variant_array_gen!(
+        partially_shredded_timestamp_micro_ntz_variant_array,
+        || {
+            arrow::array::TimestampMicrosecondArray::from(vec![
+                Some(-456000),
+                None,
+                None,
+                Some(1758602096000000),
+            ])
+        }
+    );
+
+    #[test]
+    fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() {
+        let array = partially_shredded_timestamp_micro_ntz_variant_array();
+        assert_variant_get_as_variant_array_with_default_option!(
+            array,
+            vec![
+                Some(Variant::from(
+                    DateTime::from_timestamp_micros(-456000i64)
+                        .unwrap()
+                        .naive_utc(),
+                )),
+                None,
+                Some(Variant::from("n/a")),
+                Some(Variant::from(
+                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
+                        .unwrap()
+                        .naive_utc(),
+                )),
+            ]
+        )
+    }
+
+    partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || {
+        arrow::array::TimestampMicrosecondArray::from(vec![
+            Some(-456000),
+            None,
+            None,
+            Some(1758602096000000),
+        ])
+        .with_timezone("+00:00")
+    });
+
+    #[test]
+    fn get_variant_partial_shredded_timestamp_micro_as_variant() {
+        let array = partially_shredded_timestamp_micro_variant_array();
+        assert_variant_get_as_variant_array_with_default_option!(
+            array,
+            vec![
+                Some(Variant::from(
+                    DateTime::from_timestamp_micros(-456000i64)
+                        .unwrap()
+                        .to_utc(),
+                )),
+                None,
+                Some(Variant::from("n/a")),
+                Some(Variant::from(
+                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
+                        .unwrap()
+                        .to_utc(),
+                )),
+            ]
+        )
+    }
+
+    partially_shredded_variant_array_gen!(
+        partially_shredded_timestamp_nano_ntz_variant_array,
+        || {
+            arrow::array::TimestampNanosecondArray::from(vec![
+                Some(-4999999561),
+                None,
+                None,
+                Some(1758602096000000000),
+            ])
+        }
+    );
+
+    #[test]
+    fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() {
+        let array = partially_shredded_timestamp_nano_ntz_variant_array();
+        assert_variant_get_as_variant_array_with_default_option!(
+            array,
+            vec![
+                Some(Variant::from(
+                    DateTime::from_timestamp(-5, 439).unwrap().naive_utc()
+                )),
+                None,
+                Some(Variant::from("n/a")),
+                Some(Variant::from(
+                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
+                        .unwrap()
+                        .naive_utc()
+                )),
+            ]
+        )
+    }
+
+    partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || {
+        arrow::array::TimestampNanosecondArray::from(vec![
+            Some(-4999999561),
+            None,
+            None,
+            Some(1758602096000000000),
+        ])
+        .with_timezone("+00:00")
+    });
+
+    #[test]
+    fn get_variant_partial_shredded_timestamp_nano_as_variant() {
+        let array = partially_shredded_timestamp_nano_variant_array();
+        assert_variant_get_as_variant_array_with_default_option!(
+            array,
+            vec![
+                Some(Variant::from(
+                    DateTime::from_timestamp(-5, 439).unwrap().to_utc()
+                )),
+                None,
+                Some(Variant::from("n/a")),
+                Some(Variant::from(
+                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
+                        .unwrap()
+                        .to_utc()
+                )),
+            ]
+        )
+    }
+
     /// Shredding: extract a value as an Int32Array
     #[test]
     fn get_variant_shredded_int32_as_int32_safe_cast() {
@@ -834,22 +1041,21 @@ mod test {
     macro_rules! perfectly_shredded_variant_array_fn {
         ($func:ident, $typed_value_gen:expr) => {
             fn $func() -> ArrayRef {
-                // At the time of writing, the `VariantArrayBuilder` does not support shredding.
-                // so we must construct the array manually.  see https://github.com/apache/arrow-rs/issues/7895
+                // Prefer producing fixtures with shred_variant from unshredded input.
+                // Fall back for remaining non-shreddable test-only Arrow types (currently Null).
+                let typed_value: ArrayRef = Arc::new($typed_value_gen());
+                if let Some(shredded) = cast_to_variant(typed_value.as_ref())
+                    .ok()
+                    .and_then(|unshredded| shred_variant(&unshredded, typed_value.data_type()).ok())
+                {
+                    return shredded.into();
+                }
+
                 let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(
                     EMPTY_VARIANT_METADATA_BYTES,
-                    3,
+                    typed_value.len(),
                 ));
-                let typed_value = $typed_value_gen();
-
-                let struct_array = StructArrayBuilder::new()
-                    .with_field("metadata", Arc::new(metadata), false)
-                    .with_field("typed_value", Arc::new(typed_value), true)
-                    .build();
-
-                VariantArray::try_new(&struct_array)
-                    .expect("should create variant array")
-                    .into()
+                VariantArray::from_parts(metadata, None, Some(typed_value), None).into()
             }
         };
     }
@@ -1426,162 +1632,12 @@ mod test {
         perfectly_shredded_decimal16_variant_array,
         Decimal128Array::from(vec![
             Some(i128::from_str("12345678901234567899").unwrap()),
-            Some(i128::from_str("23445677483748324300").unwrap()),
-            Some(i128::from_str("-12345678901234567899").unwrap())
-        ])
-        .with_precision_and_scale(20, 3)
-        .unwrap()
-    );
-
-    macro_rules! assert_variant_get_as_variant_array_with_default_option {
-        ($variant_array: expr, $array_expected: expr) => {{
-            let options = GetOptions::new();
-            let array = $variant_array;
-            let result = variant_get(&array, options).unwrap();
-
-            // expect the result is a VariantArray
-            let result = VariantArray::try_new(&result).unwrap();
-
-            assert_eq!(result.len(), $array_expected.len());
-
-            for (idx, item) in $array_expected.into_iter().enumerate() {
-                match item {
-                    Some(item) => assert_eq!(result.value(idx), item),
-                    None => assert!(result.is_null(idx)),
-                }
-            }
-        }};
-    }
-
-    partially_shredded_variant_array_gen!(
-        partially_shredded_timestamp_micro_ntz_variant_array,
-        || {
-            arrow::array::TimestampMicrosecondArray::from(vec![
-                Some(-456000),
-                None,
-                None,
-                Some(1758602096000000),
-            ])
-        }
-    );
-
-    #[test]
-    fn get_variant_partial_shredded_timestamp_micro_ntz_as_variant() {
-        let array = partially_shredded_timestamp_micro_ntz_variant_array();
-        assert_variant_get_as_variant_array_with_default_option!(
-            array,
-            vec![
-                Some(Variant::from(
-                    DateTime::from_timestamp_micros(-456000i64)
-                        .unwrap()
-                        .naive_utc(),
-                )),
-                None,
-                Some(Variant::from("n/a")),
-                Some(Variant::from(
-                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
-                        .unwrap()
-                        .naive_utc(),
-                )),
-            ]
-        )
-    }
-
-    partially_shredded_variant_array_gen!(partially_shredded_timestamp_micro_variant_array, || {
-        arrow::array::TimestampMicrosecondArray::from(vec![
-            Some(-456000),
-            None,
-            None,
-            Some(1758602096000000),
-        ])
-        .with_timezone("+00:00")
-    });
-
-    #[test]
-    fn get_variant_partial_shredded_timestamp_micro_as_variant() {
-        let array = partially_shredded_timestamp_micro_variant_array();
-        assert_variant_get_as_variant_array_with_default_option!(
-            array,
-            vec![
-                Some(Variant::from(
-                    DateTime::from_timestamp_micros(-456000i64)
-                        .unwrap()
-                        .to_utc(),
-                )),
-                None,
-                Some(Variant::from("n/a")),
-                Some(Variant::from(
-                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
-                        .unwrap()
-                        .to_utc(),
-                )),
-            ]
-        )
-    }
-
-    partially_shredded_variant_array_gen!(
-        partially_shredded_timestamp_nano_ntz_variant_array,
-        || {
-            arrow::array::TimestampNanosecondArray::from(vec![
-                Some(-4999999561),
-                None,
-                None,
-                Some(1758602096000000000),
-            ])
-        }
-    );
-
-    #[test]
-    fn get_variant_partial_shredded_timestamp_nano_ntz_as_variant() {
-        let array = partially_shredded_timestamp_nano_ntz_variant_array();
-
-        assert_variant_get_as_variant_array_with_default_option!(
-            array,
-            vec![
-                Some(Variant::from(
-                    DateTime::from_timestamp(-5, 439).unwrap().naive_utc()
-                )),
-                None,
-                Some(Variant::from("n/a")),
-                Some(Variant::from(
-                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
-                        .unwrap()
-                        .naive_utc()
-                )),
-            ]
-        )
-    }
-
-    partially_shredded_variant_array_gen!(partially_shredded_timestamp_nano_variant_array, || {
-        arrow::array::TimestampNanosecondArray::from(vec![
-            Some(-4999999561),
-            None,
-            None,
-            Some(1758602096000000000),
-        ])
-        .with_timezone("+00:00")
-    });
-
-    #[test]
-    fn get_variant_partial_shredded_timestamp_nano_as_variant() {
-        let array = partially_shredded_timestamp_nano_variant_array();
-
-        assert_variant_get_as_variant_array_with_default_option!(
-            array,
-            vec![
-                Some(Variant::from(
-                    DateTime::from_timestamp(-5, 439).unwrap().to_utc()
-                )),
-                None,
-                Some(Variant::from("n/a")),
-                Some(Variant::from(
-                    DateTime::parse_from_rfc3339("2025-09-23T12:34:56+08:00")
-                        .unwrap()
-                        .to_utc()
-                )),
-            ]
-        )
-    }
+            Some(i128::from_str("23445677483748324300").unwrap()),
+            Some(i128::from_str("-12345678901234567899").unwrap())
+        ])
+        .with_precision_and_scale(20, 3)
+        .unwrap()
+    );
 
     perfectly_shredded_variant_array_fn!(perfectly_shredded_binary_variant_array, || {
         BinaryArray::from(vec![
@@ -1640,144 +1696,6 @@ mod test {
         ])
     );
 
-    /// Return a VariantArray that represents a normal "shredded" variant
-    /// for the following example
-    ///
-    /// Based on the example from [the doc]
-    ///
-    /// [the doc]: https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?tab=t.0
-    ///
-    /// ```text
-    /// 34
-    /// null (an Arrow NULL, not a Variant::Null)
-    /// "n/a" (a string)
-    /// 100
-    /// ```
-    ///
-    /// The schema of the corresponding `StructArray` would look like this:
-    ///
-    /// ```text
-    /// StructArray {
-    ///   metadata: BinaryViewArray,
-    ///   value: BinaryViewArray,
-    ///   typed_value: Int32Array,
-    /// }
-    /// ```
-    macro_rules! numeric_partially_shredded_variant_array_fn {
-        ($func:ident, $array_type:ident, $primitive_type:ty) => {
-            partially_shredded_variant_array_gen!($func, || $array_type::from(vec![
-                Some(<$primitive_type>::try_from(34u8).unwrap()), // row 0 is shredded, so it has a value
-                None,                                             // row 1 is null, so no value
-                None, // row 2 is a string, so no typed value
-                Some(<$primitive_type>::try_from(100u8).unwrap()), // row 3 is shredded, so it has a value
-            ]));
-        };
-    }
-
-    macro_rules! partially_shredded_variant_array_gen {
-        ($func:ident, $typed_array_gen: expr) => {
-            fn $func() -> ArrayRef {
-                // At the time of writing, the `VariantArrayBuilder` does not support shredding.
-                // so we must construct the array manually.  see https://github.com/apache/arrow-rs/issues/7895
-                let (metadata, string_value) = {
-                    let mut builder = parquet_variant::VariantBuilder::new();
-                    builder.append_value("n/a");
-                    builder.finish()
-                };
-
-                let nulls = NullBuffer::from(vec![
-                    true,  // row 0 non null
-                    false, // row 1 is null
-                    true,  // row 2 non null
-                    true,  // row 3 non null
-                ]);
-
-                // metadata is the same for all rows
-                let metadata = BinaryViewArray::from_iter_values(std::iter::repeat_n(&metadata, 4));
-
-                // See https://docs.google.com/document/d/1pw0AWoMQY3SjD7R4LgbPvMjG_xSCtXp3rZHkVp9jpZ4/edit?disco=AAABml8WQrY
-                // about why row1 is an empty but non null, value.
-                let values = BinaryViewArray::from(vec![
-                    None,                // row 0 is shredded, so no value
-                    Some(b"" as &[u8]),  // row 1 is null, so empty value (why?)
-                    Some(&string_value), // copy the string value "N/A"
-                    None,                // row 3 is shredded, so no value
-                ]);
-
-                let typed_value = $typed_array_gen();
-
-                let struct_array = StructArrayBuilder::new()
-                    .with_field("metadata", Arc::new(metadata), false)
-                    .with_field("typed_value", Arc::new(typed_value), true)
-                    .with_field("value", Arc::new(values), true)
-                    .with_nulls(nulls)
-                    .build();
-
-                ArrayRef::from(
-                    VariantArray::try_new(&struct_array).expect("should create variant array"),
-                )
-            }
-        };
-    }
-
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_int8_variant_array,
-        Int8Array,
-        i8
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_int16_variant_array,
-        Int16Array,
-        i16
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_int32_variant_array,
-        Int32Array,
-        i32
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_int64_variant_array,
-        Int64Array,
-        i64
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_float32_variant_array,
-        Float32Array,
-        f32
-    );
-    numeric_partially_shredded_variant_array_fn!(
-        partially_shredded_float64_variant_array,
-        Float64Array,
-        f64
-    );
-
-    partially_shredded_variant_array_gen!(partially_shredded_bool_variant_array, || {
-        arrow::array::BooleanArray::from(vec![
-            Some(true),  // row 0 is shredded, so it has a value
-            None,        // row 1 is null, so no value
-            None,        // row 2 is a string, so no typed value
-            Some(false), // row 3 is shredded, so it has a value
-        ])
-    });
-
-    partially_shredded_variant_array_gen!(partially_shredded_utf8_variant_array, || {
-        StringArray::from(vec![
-            Some("hello"), // row 0 is shredded
-            None,          // row 1 is null
-            None,          // row 2 is a string
-            Some("world"), // row 3 is shredded
-        ])
-    });
-
-    partially_shredded_variant_array_gen!(partially_shredded_date32_variant_array, || {
-        Date32Array::from(vec![
-            Some(20348), // row 0 is shredded, 2025-09-17
-            None,        // row 1 is null
-            None,        // row 2 is a string, not a date
-            Some(20340), // row 3 is shredded, 2025-09-09
-        ])
-    });
-
     /// Return a VariantArray that represents an "all null" variant
     /// for the following example (3 null values):
     ///
@@ -1805,12 +1723,7 @@ mod test {
         let metadata =
             BinaryViewArray::from_iter_values(std::iter::repeat_n(EMPTY_VARIANT_METADATA_BYTES, 3));
 
-        let struct_array = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata), false)
-            .with_nulls(nulls)
-            .build();
-
-        Arc::new(struct_array)
+        ArrayRef::from(VariantArray::from_parts(metadata, None, None, Some(nulls)))
     }
     /// This test manually constructs a shredded variant array representing objects
     /// like {"x": 1, "y": "foo"} and {"x": 42} and tests extracting the "x" field
@@ -1895,13 +1808,11 @@ mod test {
         let x_field_typed_value = Int32Array::from(vec![Some(1), Some(42)]);
 
         // For perfect shredding of the x field, no "value" column, only typed_value
-        let x_field_struct = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(x_field_typed_value), true)
-            .build();
-
-        // Wrap the x field struct in a ShreddedVariantFieldArray
-        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
-            .expect("should create ShreddedVariantFieldArray");
+        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(x_field_typed_value) as ArrayRef),
+            None,
+        );
 
         // Create the main typed_value as a struct containing the "x" field
         let typed_value_fields = Fields::from(vec![Field::new(
@@ -1917,13 +1828,12 @@ mod test {
         .unwrap();
 
         // Create the main VariantArray
-        let main_struct = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata_array), false)
-            .with_field("value", Arc::new(value_array), true)
-            .with_field("typed_value", Arc::new(typed_value_struct), true)
-            .build();
-
-        Arc::new(main_struct)
+        ArrayRef::from(VariantArray::from_parts(
+            metadata_array,
+            Some(value_array),
+            Some(Arc::new(typed_value_struct)),
+            None,
+        ))
     }
 
     /// Simple test to check if nested paths are supported by current implementation
@@ -2275,12 +2185,11 @@ mod test {
         let x_field_typed_value = Int32Array::from(vec![Some(42), None]);
 
         // For the x field, only typed_value (perfect shredding when possible)
-        let x_field_struct = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(x_field_typed_value), true)
-            .build();
-
-        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
-            .expect("should create ShreddedVariantFieldArray");
+        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(x_field_typed_value) as ArrayRef),
+            None,
+        );
 
         // Create the main typed_value as a struct containing the "x" field
         let typed_value_fields = Fields::from(vec![Field::new(
@@ -2296,13 +2205,12 @@ mod test {
         .unwrap();
 
         // Build final VariantArray
-        let struct_array = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata_array), false)
-            .with_field("value", Arc::new(value_array), true)
-            .with_field("typed_value", Arc::new(typed_value_struct), true)
-            .build();
-
-        Arc::new(struct_array)
+        ArrayRef::from(VariantArray::from_parts(
+            metadata_array,
+            Some(value_array),
+            Some(Arc::new(typed_value_struct)),
+            None,
+        ))
     }
 
     /// Create working depth 1 shredded test data based on the existing working pattern
@@ -2357,11 +2265,11 @@ mod test {
         // Create the nested shredded structure
         // Level 2: x field (the deepest level)
         let x_typed_value = Int32Array::from(vec![Some(55), None]);
-        let x_field_struct = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(x_typed_value), true)
-            .build();
-        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
-            .expect("should create ShreddedVariantFieldArray for x");
+        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(x_typed_value) as ArrayRef),
+            None,
+        );
 
         // Level 1: a field containing x field + value field for fallbacks
         // The "a" field needs both typed_value (for shredded x) and value (for fallback cases)
@@ -2384,23 +2292,15 @@ mod test {
             x_field_shredded.data_type().clone(),
             true,
         )]);
-        let a_inner_struct = StructArrayBuilder::new()
-            .with_field(
-                "typed_value",
-                Arc::new(
-                    StructArray::try_new(
-                        a_inner_fields,
-                        vec![ArrayRef::from(x_field_shredded)],
-                        None,
-                    )
-                    .unwrap(),
-                ),
-                true,
-            )
-            .with_field("value", Arc::new(a_value_array), true)
-            .build();
-        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
-            .expect("should create ShreddedVariantFieldArray for a");
+        let a_inner_typed_value = Arc::new(
+            StructArray::try_new(a_inner_fields, vec![ArrayRef::from(x_field_shredded)], None)
+                .unwrap(),
+        ) as ArrayRef;
+        let a_field_shredded = ShreddedVariantFieldArray::from_parts(
+            Some(a_value_array),
+            Some(a_inner_typed_value),
+            None,
+        );
 
         // Level 0: main typed_value struct containing a field
         let typed_value_fields = Fields::from(vec![Field::new(
@@ -2416,13 +2316,12 @@ mod test {
         .unwrap();
 
         // Build final VariantArray
-        let struct_array = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata_array), false)
-            .with_field("value", Arc::new(value_array), true)
-            .with_field("typed_value", Arc::new(typed_value_struct), true)
-            .build();
-
-        Arc::new(struct_array)
+        ArrayRef::from(VariantArray::from_parts(
+            metadata_array,
+            Some(value_array),
+            Some(Arc::new(typed_value_struct)),
+            None,
+        ))
     }
 
     /// Create working depth 2 shredded test data for "a.b.x" paths
@@ -2470,11 +2369,11 @@ mod test {
 
         // Level 3: x field (deepest level)
         let x_typed_value = Int32Array::from(vec![Some(100), None, None]);
-        let x_field_struct = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(x_typed_value), true)
-            .build();
-        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
-            .expect("should create ShreddedVariantFieldArray for x");
+        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(x_typed_value) as ArrayRef),
+            None,
+        );
 
         // Level 2: b field containing x field + value field
         let b_value_data = {
@@ -2495,23 +2394,15 @@ mod test {
             x_field_shredded.data_type().clone(),
             true,
         )]);
-        let b_inner_struct = StructArrayBuilder::new()
-            .with_field(
-                "typed_value",
-                Arc::new(
-                    StructArray::try_new(
-                        b_inner_fields,
-                        vec![ArrayRef::from(x_field_shredded)],
-                        None,
-                    )
-                    .unwrap(),
-                ),
-                true,
-            )
-            .with_field("value", Arc::new(b_value_array), true)
-            .build();
-        let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_inner_struct)
-            .expect("should create ShreddedVariantFieldArray for b");
+        let b_inner_typed_value = Arc::new(
+            StructArray::try_new(b_inner_fields, vec![ArrayRef::from(x_field_shredded)], None)
+                .unwrap(),
+        ) as ArrayRef;
+        let b_field_shredded = ShreddedVariantFieldArray::from_parts(
+            Some(b_value_array),
+            Some(b_inner_typed_value),
+            None,
+        );
 
         // Level 1: a field containing b field + value field
         let a_value_data = {
@@ -2532,23 +2423,15 @@ mod test {
             b_field_shredded.data_type().clone(),
             true,
         )]);
-        let a_inner_struct = StructArrayBuilder::new()
-            .with_field(
-                "typed_value",
-                Arc::new(
-                    StructArray::try_new(
-                        a_inner_fields,
-                        vec![ArrayRef::from(b_field_shredded)],
-                        None,
-                    )
-                    .unwrap(),
-                ),
-                true,
-            )
-            .with_field("value", Arc::new(a_value_array), true)
-            .build();
-        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_inner_struct)
-            .expect("should create ShreddedVariantFieldArray for a");
+        let a_inner_typed_value = Arc::new(
+            StructArray::try_new(a_inner_fields, vec![ArrayRef::from(b_field_shredded)], None)
+                .unwrap(),
+        ) as ArrayRef;
+        let a_field_shredded = ShreddedVariantFieldArray::from_parts(
+            Some(a_value_array),
+            Some(a_inner_typed_value),
+            None,
+        );
 
         // Level 0: main typed_value struct containing a field
         let typed_value_fields = Fields::from(vec![Field::new(
@@ -2564,13 +2447,12 @@ mod test {
         .unwrap();
 
         // Build final VariantArray
-        let struct_array = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata_array), false)
-            .with_field("value", Arc::new(value_array), true)
-            .with_field("typed_value", Arc::new(typed_value_struct), true)
-            .build();
-
-        Arc::new(struct_array)
+        ArrayRef::from(VariantArray::from_parts(
+            metadata_array,
+            Some(value_array),
+            Some(Arc::new(typed_value_struct)),
+            None,
+        ))
     }
 
     #[test]
@@ -3199,10 +3081,8 @@ mod test {
         assert!(struct_result.is_null(3));
     }
 
-    /// Test that demonstrates the actual struct row builder gap
-    /// This test should fail because it hits unshredded nested structs
     #[test]
-    fn test_struct_row_builder_gap_demonstration() {
+    fn test_struct_row_builder_handles_unshredded_nested_structs() {
         // Create completely unshredded JSON variant (no typed_value at all)
         let json_strings = vec![
             r#"{"outer": {"inner": 42}}"#,
@@ -3211,7 +3091,7 @@ mod test {
         let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
         let variant_array = json_to_variant(&string_array).unwrap();
 
-        // Request nested struct - this should fail at the row builder level
+        // Request nested struct
         let inner_fields = Fields::from(vec![Field::new("inner", DataType::Int32, true)]);
         let inner_struct_type = DataType::Struct(inner_fields);
         let outer_fields = Fields::from(vec![Field::new("outer", inner_struct_type, true)]);
@@ -3224,12 +3104,97 @@ mod test {
         };
 
         let variant_array_ref = ArrayRef::from(variant_array);
-        let result = variant_get(&variant_array_ref, options);
+        let result = variant_get(&variant_array_ref, options).unwrap();
 
-        // Should fail with NotYetImplemented when the row builder tries to handle struct type
-        assert!(result.is_err());
-        let error = result.unwrap_err();
-        assert!(error.to_string().contains("Not yet implemented"));
+        let outer_struct = result.as_struct();
+        assert_eq!(outer_struct.len(), 2);
+        assert_eq!(outer_struct.num_columns(), 1);
+
+        let inner_struct = outer_struct.column(0).as_struct();
+        assert_eq!(inner_struct.num_columns(), 1);
+
+        let inner_values = inner_struct
+            .column(0)
+            .as_any()
+            .downcast_ref::<Int32Array>()
+            .unwrap();
+        assert_eq!(inner_values.value(0), 42);
+        assert_eq!(inner_values.value(1), 100);
+    }
+
+    #[test]
+    fn test_unshredded_struct_safe_cast_non_object_rows_are_null() {
+        let json_strings = vec![r#"{"a": 1, "b": 2}"#, "123", "{}"];
+        let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
+        let variant_array_ref = ArrayRef::from(json_to_variant(&string_array).unwrap());
+
+        let struct_fields = Fields::from(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]);
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new(
+                "result",
+                DataType::Struct(struct_fields),
+                true,
+            ))),
+            cast_options: CastOptions::default(),
+        };
+
+        let result = variant_get(&variant_array_ref, options).unwrap();
+        let struct_result = result.as_struct();
+        let field_a = struct_result
+            .column(0)
+            .as_primitive::<arrow::datatypes::Int32Type>();
+        let field_b = struct_result
+            .column(1)
+            .as_primitive::<arrow::datatypes::Int32Type>();
+
+        // Row 0 is an object, so the struct row is valid with extracted fields.
+        assert!(!struct_result.is_null(0));
+        assert_eq!(field_a.value(0), 1);
+        assert_eq!(field_b.value(0), 2);
+
+        // Row 1 is a scalar, so safe struct cast should produce a NULL struct row.
+        assert!(struct_result.is_null(1));
+        assert!(field_a.is_null(1));
+        assert!(field_b.is_null(1));
+
+        // Row 2 is an empty object, so the struct row is valid with missing fields as NULL.
+        assert!(!struct_result.is_null(2));
+        assert!(field_a.is_null(2));
+        assert!(field_b.is_null(2));
+    }
+
+    #[test]
+    fn test_unshredded_struct_strict_cast_non_object_errors() {
+        let json_strings = vec![r#"{"a": 1, "b": 2}"#, "123"];
+        let string_array: Arc<dyn Array> = Arc::new(StringArray::from(json_strings));
+        let variant_array_ref = ArrayRef::from(json_to_variant(&string_array).unwrap());
+
+        let struct_fields = Fields::from(vec![
+            Field::new("a", DataType::Int32, true),
+            Field::new("b", DataType::Int32, true),
+        ]);
+        let options = GetOptions {
+            path: VariantPath::default(),
+            as_type: Some(Arc::new(Field::new(
+                "result",
+                DataType::Struct(struct_fields),
+                true,
+            ))),
+            cast_options: CastOptions {
+                safe: false,
+                ..Default::default()
+            },
+        };
+
+        let err = variant_get(&variant_array_ref, options).unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Failed to extract struct from variant")
+        );
     }
 
     /// Create comprehensive shredded variant with diverse null patterns and empty objects
@@ -3256,27 +3221,27 @@ mod test {
         // Create shredded fields with different null patterns
         // Field "a": present in rows 0,3 (missing in rows 1,2,4)
         let a_field_typed_value = Int32Array::from(vec![Some(1), None, None, Some(1), None]);
-        let a_field_struct = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(a_field_typed_value), true)
-            .build();
-        let a_field_shredded = ShreddedVariantFieldArray::try_new(&a_field_struct)
-            .expect("should create ShreddedVariantFieldArray for a");
+        let a_field_shredded = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(a_field_typed_value) as ArrayRef),
+            None,
+        );
 
         // Field "b": present in rows 0,2 (missing in rows 1,3,4)
         let b_field_typed_value = Int32Array::from(vec![Some(2), None, Some(2), None, None]);
-        let b_field_struct = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(b_field_typed_value), true)
-            .build();
-        let b_field_shredded = ShreddedVariantFieldArray::try_new(&b_field_struct)
-            .expect("should create ShreddedVariantFieldArray for b");
+        let b_field_shredded = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(b_field_typed_value) as ArrayRef),
+            None,
+        );
 
         // Field "c": present in row 0 only (missing in all other rows)
         let c_field_typed_value = Int32Array::from(vec![Some(3), None, None, None, None]);
-        let c_field_struct = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(c_field_typed_value), true)
-            .build();
-        let c_field_shredded = ShreddedVariantFieldArray::try_new(&c_field_struct)
-            .expect("should create ShreddedVariantFieldArray for c");
+        let c_field_shredded = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(c_field_typed_value) as ArrayRef),
+            None,
+        );
 
         // Create main typed_value struct
         let typed_value_fields = Fields::from(vec![
@@ -3296,13 +3261,12 @@ mod test {
         .unwrap();
 
         // Build final VariantArray with top-level nulls
-        let struct_array = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata_array), false)
-            .with_field("typed_value", Arc::new(typed_value_struct), true)
-            .with_nulls(nulls)
-            .build();
-
-        Arc::new(struct_array)
+        ArrayRef::from(VariantArray::from_parts(
+            metadata_array,
+            None,
+            Some(Arc::new(typed_value_struct)),
+            Some(nulls),
+        ))
     }
 
     /// Create comprehensive nested shredded variant with diverse null patterns
@@ -3313,10 +3277,11 @@ mod test {
         // Create the inner level: contains typed_value with Int32 values
         // Row 0: has value 42, Row 1: inner null, Row 2: outer null, Row 3: top-level null
         let inner_typed_value = Int32Array::from(vec![Some(42), None, None, None]); // dummy value for row 2
-        let inner = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(inner_typed_value), true)
-            .build();
-        let inner = ShreddedVariantFieldArray::try_new(&inner).unwrap();
+        let inner = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(inner_typed_value) as ArrayRef),
+            None,
+        );
 
         let outer_typed_value_nulls = NullBuffer::from(vec![
             true,  // row 0: inner struct exists with typed_value=42
@@ -3329,10 +3294,11 @@ mod test {
             .with_nulls(outer_typed_value_nulls)
             .build();
 
-        let outer = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(outer_typed_value), true)
-            .build();
-        let outer = ShreddedVariantFieldArray::try_new(&outer).unwrap();
+        let outer = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(outer_typed_value) as ArrayRef),
+            None,
+        );
 
         let typed_value_nulls = NullBuffer::from(vec![
             true,  // row 0: inner struct exists with typed_value=42
@@ -3354,13 +3320,12 @@ mod test {
             true,  // row 2: outer field NULL
             false, // row 3: top-level NULL
         ]);
-        let struct_array = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata_array), false)
-            .with_field("typed_value", Arc::new(typed_value), true)
-            .with_nulls(nulls)
-            .build();
-
-        Arc::new(struct_array)
+        ArrayRef::from(VariantArray::from_parts(
+            metadata_array,
+            None,
+            Some(Arc::new(typed_value)),
+            Some(nulls),
+        ))
     }
 
     /// Create variant with mixed shredding (spec-compliant) including null scenarios
@@ -3410,11 +3375,11 @@ mod test {
         // Create shredded field "x" (globally shredded - never appears in value field)
         // For top-level null row, the field still needs valid content (not null)
         let x_field_typed_value = Int32Array::from(vec![Some(1), Some(2), Some(3), Some(0)]);
-        let x_field_struct = StructArrayBuilder::new()
-            .with_field("typed_value", Arc::new(x_field_typed_value), true)
-            .build();
-        let x_field_shredded = ShreddedVariantFieldArray::try_new(&x_field_struct)
-            .expect("should create ShreddedVariantFieldArray for x");
+        let x_field_shredded = ShreddedVariantFieldArray::from_parts(
+            None,
+            Some(Arc::new(x_field_typed_value) as ArrayRef),
+            None,
+        );
 
         // Create main typed_value struct (only contains shredded fields)
         let typed_value_struct = StructArrayBuilder::new()
@@ -3424,14 +3389,12 @@ mod test {
         // Build VariantArray with both value and typed_value (PartiallyShredded)
         // Top-level null is encoded in the main StructArray's null mask
         let variant_nulls = NullBuffer::from(vec![true, true, true, false]); // Row 3 is top-level null
-        let struct_array = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata_array), false)
-            .with_field("value", Arc::new(value_array), true)
-            .with_field("typed_value", Arc::new(typed_value_struct), true)
-            .with_nulls(variant_nulls)
-            .build();
-
-        Arc::new(struct_array)
+        ArrayRef::from(VariantArray::from_parts(
+            metadata_array,
+            Some(value_array),
+            Some(Arc::new(typed_value_struct)),
+            Some(variant_nulls),
+        ))
     }
 
     #[test]
@@ -4105,11 +4068,9 @@ mod test {
             EMPTY_VARIANT_METADATA_BYTES,
             all_nulls_values.len(),
         ));
-        let variant_struct = StructArrayBuilder::new()
-            .with_field("metadata", Arc::new(metadata), false)
-            .with_field("typed_value", Arc::new(typed_value_struct), true)
-            .build();
-        let variant_array: ArrayRef = VariantArray::try_new(&variant_struct).unwrap().into();
+        let variant_array: ArrayRef =
+            VariantArray::from_parts(metadata, None, Some(Arc::new(typed_value_struct)), None)
+                .into();
 
         // Case 1: all-null primitive column should reuse the typed_value Arc directly
         let all_nulls_field_ref = FieldRef::from(Field::new("result", DataType::Int32, true));
@@ -4309,6 +4270,59 @@ mod test {
         }
     }
 
+    #[test]
+    fn test_variant_get_list_like_unsafe_cast_preserves_null_elements() {
+        let string_array: ArrayRef = Arc::new(StringArray::from(vec![r#"[1, null, 3]"#]));
+        let variant_array = ArrayRef::from(json_to_variant(&string_array).unwrap());
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let options = GetOptions::new()
+            .with_as_type(Some(FieldRef::from(Field::new(
+                "result",
+                DataType::List(Arc::new(Field::new("item", DataType::Int64, true))),
+                true,
+            ))))
+            .with_cast_options(cast_options);
+
+        let result = variant_get(&variant_array, options).unwrap();
+        let element_struct = result
+            .as_any()
+            .downcast_ref::<ListArray>()
+            .unwrap()
+            .values()
+            .as_any()
+            .downcast_ref::<StructArray>()
+            .unwrap();
+
+        let value = element_struct
+            .column_by_name("value")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<BinaryViewArray>()
+            .unwrap();
+        let typed_value = element_struct
+            .column_by_name("typed_value")
+            .unwrap()
+            .as_any()
+            .downcast_ref::<Int64Array>()
+            .unwrap();
+
+        assert_eq!(typed_value.len(), 3);
+        assert_eq!(typed_value.value(0), 1);
+        assert!(typed_value.is_null(1));
+        assert_eq!(typed_value.value(2), 3);
+
+        assert!(value.is_null(0));
+        assert!(value.is_valid(1));
+        assert_eq!(
+            Variant::new(EMPTY_VARIANT_METADATA_BYTES, value.value(1)),
+            Variant::Null
+        );
+        assert!(value.is_null(2));
+    }
+
     #[test]
     fn test_variant_get_list_like_unsafe_cast_errors_on_non_list() {
         let string_array: ArrayRef = Arc::new(StringArray::from(vec!["[1, 2]", "\"not a list\""]));
diff --git a/parquet-variant-compute/src/variant_to_arrow.rs b/parquet-variant-compute/src/variant_to_arrow.rs
index 106e8915beb8..dd396117d22d 100644
--- a/parquet-variant-compute/src/variant_to_arrow.rs
+++ b/parquet-variant-compute/src/variant_to_arrow.rs
@@ -16,10 +16,12 @@
 // under the License.
 
 use crate::shred_variant::{
-    VariantToShreddedVariantRowBuilder, make_variant_to_shredded_variant_arrow_row_builder,
+    NullValue, VariantToShreddedVariantRowBuilder,
+    make_variant_to_shredded_variant_arrow_row_builder,
 };
 use crate::type_conversion::{
-    PrimitiveFromVariant, TimestampFromVariant, variant_to_unscaled_decimal,
+    PrimitiveFromVariant, TimestampFromVariant, variant_cast_with_options,
+    variant_to_unscaled_decimal,
 };
 use crate::variant_array::ShreddedVariantFieldArray;
 use crate::{VariantArray, VariantValueArrayBuilder};
@@ -28,12 +30,13 @@ use arrow::array::{
     BinaryViewBuilder, BooleanBuilder, FixedSizeBinaryBuilder, GenericListArray,
     GenericListViewArray, LargeBinaryBuilder, LargeStringBuilder, NullArray, NullBufferBuilder,
     OffsetSizeTrait, PrimitiveBuilder, StringBuilder, StringLikeArrayBuilder, StringViewBuilder,
+    StructArray,
 };
 use arrow::buffer::{OffsetBuffer, ScalarBuffer};
 use arrow::compute::{CastOptions, DecimalCast};
 use arrow::datatypes::{self, DataType, DecimalType};
 use arrow::error::{ArrowError, Result};
-use arrow_schema::{FieldRef, TimeUnit};
+use arrow_schema::{FieldRef, Fields, TimeUnit};
 use parquet_variant::{Variant, VariantPath};
 use std::sync::Arc;
 
@@ -44,6 +47,7 @@ use std::sync::Arc;
 pub(crate) enum VariantToArrowRowBuilder<'a> {
     Primitive(PrimitiveVariantToArrowRowBuilder<'a>),
     Array(ArrayVariantToArrowRowBuilder<'a>),
+    Struct(StructVariantToArrowRowBuilder<'a>),
     BinaryVariant(VariantToBinaryVariantArrowRowBuilder),
 
     // Path extraction wrapper - contains a boxed enum for any of the above
@@ -56,6 +60,7 @@ impl<'a> VariantToArrowRowBuilder<'a> {
         match self {
             Primitive(b) => b.append_null(),
             Array(b) => b.append_null(),
+            Struct(b) => b.append_null(),
             BinaryVariant(b) => b.append_null(),
             WithPath(path_builder) => path_builder.append_null(),
         }
@@ -66,6 +71,7 @@ impl<'a> VariantToArrowRowBuilder<'a> {
         match self {
             Primitive(b) => b.append_value(&value),
             Array(b) => b.append_value(&value),
+            Struct(b) => b.append_value(&value),
             BinaryVariant(b) => b.append_value(value),
             WithPath(path_builder) => path_builder.append_value(value),
         }
@@ -76,12 +82,42 @@ impl<'a> VariantToArrowRowBuilder<'a> {
         match self {
             Primitive(b) => b.finish(),
             Array(b) => b.finish(),
+            Struct(b) => b.finish(),
             BinaryVariant(b) => b.finish(),
             WithPath(path_builder) => path_builder.finish(),
         }
     }
 }
 
+fn make_typed_variant_to_arrow_row_builder<'a>(
+    data_type: &'a DataType,
+    cast_options: &'a CastOptions,
+    capacity: usize,
+) -> Result<VariantToArrowRowBuilder<'a>> {
+    use VariantToArrowRowBuilder::*;
+
+    match data_type {
+        DataType::Struct(fields) => {
+            let builder = StructVariantToArrowRowBuilder::try_new(fields, cast_options, capacity)?;
+            Ok(Struct(builder))
+        }
+        data_type @ (DataType::List(_)
+        | DataType::LargeList(_)
+        | DataType::ListView(_)
+        | DataType::LargeListView(_)
+        | DataType::FixedSizeList(..)) => {
+            let builder =
+                ArrayVariantToArrowRowBuilder::try_new(data_type, cast_options, capacity)?;
+            Ok(Array(builder))
+        }
+        data_type => {
+            let builder =
+                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
+            Ok(Primitive(builder))
+        }
+    }
+}
+
 pub(crate) fn make_variant_to_arrow_row_builder<'a>(
     metadata: &BinaryViewArray,
     path: VariantPath<'a>,
@@ -97,26 +133,8 @@ pub(crate) fn make_variant_to_arrow_row_builder<'a>(
             metadata.clone(),
             capacity,
         )),
-        Some(DataType::Struct(_)) => {
-            return Err(ArrowError::NotYetImplemented(
-                "Converting unshredded variant objects to arrow structs".to_string(),
-            ));
-        }
-        Some(
-            data_type @ (DataType::List(_)
-            | DataType::LargeList(_)
-            | DataType::ListView(_)
-            | DataType::LargeListView(_)
-            | DataType::FixedSizeList(..)),
-        ) => {
-            let builder =
-                ArrayVariantToArrowRowBuilder::try_new(data_type, cast_options, capacity)?;
-            Array(builder)
-        }
         Some(data_type) => {
-            let builder =
-                make_primitive_variant_to_arrow_row_builder(data_type, cast_options, capacity)?;
-            Primitive(builder)
+            make_typed_variant_to_arrow_row_builder(data_type, cast_options, capacity)?
         }
     };
 
@@ -491,6 +509,83 @@ pub(crate) enum ArrayVariantToArrowRowBuilder<'a> {
     LargeListView(VariantToListArrowRowBuilder<'a, i64, true>),
 }
 
+pub(crate) struct StructVariantToArrowRowBuilder<'a> {
+    fields: &'a Fields,
+    field_builders: Vec<VariantToArrowRowBuilder<'a>>,
+    nulls: NullBufferBuilder,
+    cast_options: &'a CastOptions<'a>,
+}
+
+impl<'a> StructVariantToArrowRowBuilder<'a> {
+    fn try_new(
+        fields: &'a Fields,
+        cast_options: &'a CastOptions<'a>,
+        capacity: usize,
+    ) -> Result<Self> {
+        let mut field_builders = Vec::with_capacity(fields.len());
+        for field in fields.iter() {
+            field_builders.push(make_typed_variant_to_arrow_row_builder(
+                field.data_type(),
+                cast_options,
+                capacity,
+            )?);
+        }
+        Ok(Self {
+            fields,
+            field_builders,
+            nulls: NullBufferBuilder::new(capacity),
+            cast_options,
+        })
+    }
+
+    fn append_null(&mut self) -> Result<()> {
+        for builder in &mut self.field_builders {
+            builder.append_null()?;
+        }
+        self.nulls.append_null();
+        Ok(())
+    }
+
+    fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
+        match variant_cast_with_options(value, self.cast_options, Variant::as_object) {
+            Ok(Some(obj)) => {
+                for (index, field) in self.fields.iter().enumerate() {
+                    match obj.get(field.name()) {
+                        Some(field_value) => {
+                            self.field_builders[index].append_value(field_value)?;
+                        }
+                        None => {
+                            self.field_builders[index].append_null()?;
+                        }
+                    }
+                }
+
+                self.nulls.append_non_null();
+                Ok(true)
+            }
+            Ok(None) => {
+                self.append_null()?;
+                Ok(false)
+            }
+            Err(_) => Err(ArrowError::CastError(format!(
+                "Failed to extract struct from variant {value:?}"
+            ))),
+        }
+    }
+
+    fn finish(mut self) -> Result<ArrayRef> {
+        let mut children = Vec::with_capacity(self.field_builders.len());
+        for builder in self.field_builders {
+            children.push(builder.finish()?);
+        }
+        Ok(Arc::new(StructArray::try_new(
+            self.fields.clone(),
+            children,
+            self.nulls.finish(),
+        )?))
+    }
+}
+
 impl<'a> ArrayVariantToArrowRowBuilder<'a> {
     pub(crate) fn try_new(
         data_type: &'a DataType,
@@ -614,21 +709,24 @@ macro_rules! define_variant_to_primitive_builder {
             }
 
             fn append_value(&mut self, $value: &Variant<'_, '_>) -> Result<bool> {
-                if let Some(v) = $value_transform {
-                    self.builder.append_value(v);
-                    Ok(true)
-                } else {
-                    if !self.cast_options.safe {
-                        // Unsafe casting: return error on conversion failure
-                        return Err(ArrowError::CastError(format!(
-                            "Failed to extract primitive of type {} from variant {:?} at path VariantPath([])",
-                            $type_name,
-                            $value
-                        )));
+                match variant_cast_with_options(
+                    $value,
+                    self.cast_options,
+                    |$value| $value_transform,
+                ) {
+                    Ok(Some(v)) => {
+                        self.builder.append_value(v);
+                        Ok(true)
+                    }
+                    Ok(None) => {
+                        self.builder.append_null();
+                        Ok(false)
                     }
-                    // Safe casting: append null on conversion failure
-                    self.builder.append_null();
-                    Ok(false)
+                    Err(_) => Err(ArrowError::CastError(format!(
+                        "Failed to extract primitive of type {type_name} from variant {value:?} at path VariantPath([])",
+                        type_name = $type_name,
+                        value = $value
+                    ))),
                 }
             }
 
@@ -655,7 +753,7 @@ define_variant_to_primitive_builder!(
 define_variant_to_primitive_builder!(
     struct VariantToBooleanArrowRowBuilder<'a>
     |capacity| -> BooleanBuilder { BooleanBuilder::with_capacity(capacity) },
-    |value|  value.as_boolean(),
+    |value| value.as_boolean(),
     type_name: datatypes::BooleanType::DATA_TYPE
 );
 
@@ -728,20 +826,23 @@ where
     }
 
     fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
-        if let Some(scaled) = variant_to_unscaled_decimal::<T>(value, self.precision, self.scale) {
-            self.builder.append_value(scaled);
-            Ok(true)
-        } else if self.cast_options.safe {
-            self.builder.append_null();
-            Ok(false)
-        } else {
-            Err(ArrowError::CastError(format!(
-                "Failed to cast to {}(precision={}, scale={}) from variant {:?}",
-                T::PREFIX,
-                self.precision,
-                self.scale,
-                value
-            )))
+        match variant_cast_with_options(value, self.cast_options, |value| {
+            variant_to_unscaled_decimal::<T>(value, self.precision, self.scale)
+        }) {
+            Ok(Some(scaled)) => {
+                self.builder.append_value(scaled);
+                Ok(true)
+            }
+            Ok(None) => {
+                self.builder.append_null();
+                Ok(false)
+            }
+            Err(_) => Err(ArrowError::CastError(format!(
+                "Failed to cast to {prefix}(precision={precision}, scale={scale}) from variant {value:?}",
+                prefix = T::PREFIX,
+                precision = self.precision,
+                scale = self.scale
+            ))),
         }
     }
 
@@ -770,20 +871,19 @@ impl<'a> VariantToUuidArrowRowBuilder<'a> {
     }
 
     fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
-        match value.as_uuid() {
-            Some(uuid) => {
+        match variant_cast_with_options(value, self.cast_options, Variant::as_uuid) {
+            Ok(Some(uuid)) => {
                 self.builder
                     .append_value(uuid.as_bytes())
                     .map_err(|e| ArrowError::ExternalError(Box::new(e)))?;
-
                 Ok(true)
             }
-            None if self.cast_options.safe => {
+            Ok(None) => {
                 self.builder.append_null();
                 Ok(false)
             }
-            None => Err(ArrowError::CastError(format!(
-                "Failed to extract UUID from variant {value:?}",
+            Err(_) => Err(ArrowError::CastError(format!(
+                "Failed to extract UUID from variant {value:?}"
             ))),
         }
     }
@@ -826,7 +926,7 @@ where
             element_data_type,
             cast_options,
             capacity,
-            false,
+            NullValue::ArrayElement,
         )?;
         Ok(Self {
             field,
@@ -845,8 +945,8 @@ where
     }
 
     fn append_value(&mut self, value: &Variant<'_, '_>) -> Result<bool> {
-        match value {
-            Variant::List(list) => {
+        match variant_cast_with_options(value, self.cast_options, Variant::as_list) {
+            Ok(Some(list)) => {
                 for element in list.iter() {
                     self.element_builder.append_value(element)?;
                     self.current_offset = self.current_offset.add_checked(O::ONE)?;
@@ -855,13 +955,12 @@ where
                 self.nulls.append_non_null();
                 Ok(true)
             }
-            _ if self.cast_options.safe => {
+            Ok(None) => {
                 self.append_null()?;
                 Ok(false)
             }
-            _ => Err(ArrowError::CastError(format!(
-                "Failed to extract list from variant {:?}",
-                value
+            Err(_) => Err(ArrowError::CastError(format!(
+                "Failed to extract list from variant {value:?}"
             ))),
         }
     }
@@ -974,11 +1073,18 @@ define_variant_to_primitive_builder!(
 
 #[cfg(test)]
 mod tests {
-    use super::make_primitive_variant_to_arrow_row_builder;
+    use super::{
+        make_primitive_variant_to_arrow_row_builder, make_typed_variant_to_arrow_row_builder,
+    };
+    use arrow::array::{
+        Array, Decimal32Array, FixedSizeBinaryArray, Int32Array, ListArray, StructArray,
+    };
     use arrow::compute::CastOptions;
     use arrow::datatypes::{DataType, Field, Fields, UnionFields, UnionMode};
     use arrow::error::ArrowError;
+    use parquet_variant::{Variant, VariantDecimal4};
     use std::sync::Arc;
+    use uuid::Uuid;
 
     #[test]
     fn make_primitive_builder_rejects_non_primitive_types() {
@@ -1027,4 +1133,97 @@ mod tests {
             }
         }
     }
+
+    #[test]
+    fn strict_cast_allows_variant_null_for_primitive_builder() {
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut builder =
+            make_primitive_variant_to_arrow_row_builder(&DataType::Int32, &cast_options, 2)
+                .unwrap();
+
+        assert!(!builder.append_value(&Variant::Null).unwrap());
+        assert!(builder.append_value(&Variant::Int32(42)).unwrap());
+
+        let array = builder.finish().unwrap();
+        let int_array = array.as_any().downcast_ref::<Int32Array>().unwrap();
+        assert!(int_array.is_null(0));
+        assert_eq!(int_array.value(1), 42);
+    }
+
+    #[test]
+    fn strict_cast_allows_variant_null_for_decimal_builder() {
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut builder = make_primitive_variant_to_arrow_row_builder(
+            &DataType::Decimal32(9, 2),
+            &cast_options,
+            2,
+        )
+        .unwrap();
+        let decimal_variant: Variant<'_, '_> = VariantDecimal4::try_new(1234, 2).unwrap().into();
+
+        assert!(!builder.append_value(&Variant::Null).unwrap());
+        assert!(builder.append_value(&decimal_variant).unwrap());
+
+        let array = builder.finish().unwrap();
+        let decimal_array = array.as_any().downcast_ref::<Decimal32Array>().unwrap();
+        assert!(decimal_array.is_null(0));
+        assert_eq!(decimal_array.value(1), 1234);
+    }
+
+    #[test]
+    fn strict_cast_allows_variant_null_for_uuid_builder() {
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+        let mut builder = make_primitive_variant_to_arrow_row_builder(
+            &DataType::FixedSizeBinary(16),
+            &cast_options,
+            2,
+        )
+        .unwrap();
+        let uuid = Uuid::nil();
+
+        assert!(!builder.append_value(&Variant::Null).unwrap());
+        assert!(builder.append_value(&Variant::Uuid(uuid)).unwrap());
+
+        let array = builder.finish().unwrap();
+        let uuid_array = array
+            .as_any()
+            .downcast_ref::<FixedSizeBinaryArray>()
+            .unwrap();
+        assert!(uuid_array.is_null(0));
+        assert_eq!(uuid_array.value(1), uuid.as_bytes());
+    }
+
+    #[test]
+    fn strict_cast_allows_variant_null_for_list_and_struct_builders() {
+        let cast_options = CastOptions {
+            safe: false,
+            ..Default::default()
+        };
+
+        let list_type = DataType::List(Arc::new(Field::new("item", DataType::Int64, true)));
+        let mut list_builder =
+            make_typed_variant_to_arrow_row_builder(&list_type, &cast_options, 1).unwrap();
+        assert!(!list_builder.append_value(Variant::Null).unwrap());
+        let list_array = list_builder.finish().unwrap();
+        let list_array = list_array.as_any().downcast_ref::<ListArray>().unwrap();
+        assert!(list_array.is_null(0));
+
+        let struct_type =
+            DataType::Struct(Fields::from(vec![Field::new("a", DataType::Int32, true)]));
+        let mut struct_builder =
+            make_typed_variant_to_arrow_row_builder(&struct_type, &cast_options, 1).unwrap();
+        assert!(!struct_builder.append_value(Variant::Null).unwrap());
+        let struct_array = struct_builder.finish().unwrap();
+        let struct_array = struct_array.as_any().downcast_ref::<StructArray>().unwrap();
+        assert!(struct_array.is_null(0));
+    }
 }
diff --git a/parquet-variant/src/path.rs b/parquet-variant/src/path.rs
index fe10d0451d54..8e68d9efadf2 100644
--- a/parquet-variant/src/path.rs
+++ b/parquet-variant/src/path.rs
@@ -75,14 +75,15 @@ use std::{borrow::Cow, ops::Deref};
 /// assert_eq!(path[1], VariantPathElement::field("bar"));
 /// ```
 ///
-/// # Example: Accessing filed with bracket
+/// # Example: Accessing field with bracket
 /// ```
 /// # use parquet_variant::{VariantPath, VariantPathElement};
-/// let path = VariantPath::try_from("a[b.c].d[2]").unwrap();
+/// let path = VariantPath::try_from("a['b.c'].d[2]['3']").unwrap();
 /// let expected = VariantPath::from_iter([VariantPathElement::field("a"),
 ///     VariantPathElement::field("b.c"),
 ///     VariantPathElement::field("d"),
-///     VariantPathElement::index(2)]);
+///     VariantPathElement::index(2),
+///     VariantPathElement::field("3")]);
 /// assert_eq!(path, expected)
 #[derive(Debug, Clone, PartialEq, Default)]
 pub struct VariantPath<'a>(Vec<VariantPathElement<'a>>);
@@ -287,11 +288,22 @@ mod tests {
         assert_eq!(path, expected);
 
         // invalid index will be treated as field
-        let path = VariantPath::try_from("foo.bar[abc]").unwrap();
+        let path = VariantPath::try_from("foo.bar['abc'][\"def\"]").unwrap();
         let expected = VariantPath::from_iter([
             VariantPathElement::field("foo"),
             VariantPathElement::field("bar"),
             VariantPathElement::field("abc"),
+            VariantPathElement::field("def"),
+        ]);
+        assert_eq!(path, expected);
+
+        // a number quoted with `'` is treated as field, not index
+        let path = VariantPath::try_from("foo['0'].bar[\"1\"]").unwrap();
+        let expected = VariantPath::from_iter([
+            VariantPathElement::field("foo"),
+            VariantPathElement::field("0"),
+            VariantPathElement::field("bar"),
+            VariantPathElement::field("1"),
         ]);
         assert_eq!(path, expected);
     }
@@ -321,5 +333,18 @@ mod tests {
         // No '[' before ']'
         let err = VariantPath::try_from("foo.bar]baz").unwrap_err();
         assert_eq!(err.to_string(), "Parser error: Unexpected ']' at byte 7");
+
+        // Invalid number(without quote) parse
+        let err = VariantPath::try_from("foo.bar[123abc]").unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parser error: Invalid token in bracket request: `123abc`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)"
+        );
+
+        let err = VariantPath::try_from("foo.bar[abc]").unwrap_err();
+        assert_eq!(
+            err.to_string(),
+            "Parser error: Invalid token in bracket request: `abc`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)"
+        );
     }
 }
diff --git a/parquet-variant/src/utils.rs b/parquet-variant/src/utils.rs
index 0984a601b213..85d79ed8aea0 100644
--- a/parquet-variant/src/utils.rs
+++ b/parquet-variant/src/utils.rs
@@ -170,9 +170,10 @@ pub(crate) fn fits_precision<const N: u32>(n: impl Into<i64>) -> bool {
 /// - `"foo"` -> single field `foo`
 /// - `"foo.bar"` -> nested fields `foo`, `bar`
 /// - `"[1]"` -> array index 1
+/// - `"['1']"` or `"["1"]"`-> field `1`
 /// - `"foo[1].bar"` -> field `foo`, index 1, field `bar`
-/// - `"[a.b]"` -> field `a.b` (dot is literal inside bracket)
-/// - `"[a\\]b]"` -> field `a]b` (escaped `]`
+/// - `"['a.b']"` -> field `a.b` (dot is literal inside bracket)
+/// - `"['a\]b']"` -> field `a]b` (escaped `]`
 /// - etc.
 ///
 /// # Errors
@@ -267,9 +268,23 @@ fn parse_in_bracket(s: &str, i: usize) -> Result<(VariantPathElement<'_>, usize)
         }
     };
 
-    let element = match unescaped.parse() {
-        Ok(idx) => VariantPathElement::index(idx),
-        Err(_) => VariantPathElement::field(unescaped),
+    let element = if let Some(inner) = unescaped
+        .strip_prefix('\'')
+        .and_then(|s| s.strip_suffix('\''))
+        .or_else(|| {
+            unescaped
+                .strip_prefix('"')
+                .and_then(|s| s.strip_suffix('"'))
+        }) {
+        // Quoted field name, e.g., ['field'] or ['123'] or ["123"]
+        VariantPathElement::field(inner.to_string())
+    } else {
+        let Ok(idx) = unescaped.parse() else {
+            return Err(ArrowError::ParseError(format!(
+                "Invalid token in bracket request: `{unescaped}`. Expected a quoted string or a number(e.g., `['field']` or `[123]`)"
+            )));
+        };
+        VariantPathElement::index(idx)
     };
 
     Ok((element, end + 1))
diff --git a/parquet/Cargo.toml b/parquet/Cargo.toml
index d1ada01c3773..efcd1fe2190b 100644
--- a/parquet/Cargo.toml
+++ b/parquet/Cargo.toml
@@ -49,7 +49,7 @@ parquet-variant = { workspace = true, optional = true }
 parquet-variant-json = { workspace = true, optional = true }
 parquet-variant-compute = { workspace = true, optional = true }
 
-object_store = { version = "0.13.1", default-features = false, optional = true }
+object_store = { workspace = true, optional = true, features = ["tokio"] }
 
 bytes = { version = "1.1", default-features = false, features = ["std"] }
 thrift = { version = "0.17", default-features = false }
@@ -57,7 +57,7 @@ snap = { version = "1.0", default-features = false, optional = true }
 brotli = { version = "8.0", default-features = false, features = ["std"], optional = true }
 # To use `flate2` you must enable either the `flate2-zlib-rs` or `flate2-rust_backened` backends
 flate2 = { version = "1.1", default-features = false, optional = true }
-lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"], optional = true }
+lz4_flex = { version = "0.13", default-features = false, features = ["std", "frame"], optional = true }
 zstd = { version = "0.13", optional = true, default-features = false }
 chrono = { workspace = true }
 num-bigint = { version = "0.4", default-features = false }
@@ -83,17 +83,17 @@ base64 = { version = "0.22", default-features = false, features = ["std"] }
 criterion = { workspace = true, default-features = false, features = ["async_futures"]  }
 snap = { version = "1.0", default-features = false }
 tempfile = { version = "3.0", default-features = false }
-insta = "1.43.1"
+insta = { workspace = true, default-features = true }
 brotli = { version = "8.0", default-features = false, features = ["std"] }
 flate2 = { version = "1.0", default-features = false, features = ["rust_backend"] }
-lz4_flex = { version = "0.12", default-features = false, features = ["std", "frame"] }
+lz4_flex = { version = "0.13", default-features = false, features = ["std", "frame"] }
 zstd = { version = "0.13", default-features = false }
 serde_json = { version = "1.0", features = ["std"], default-features = false }
 arrow = { workspace = true, features = ["ipc", "test_utils", "prettyprint", "json"] }
 arrow-cast = { workspace = true }
 tokio = { version = "1.0", default-features = false, features = ["macros", "rt-multi-thread", "io-util", "fs"] }
 rand = { version = "0.9", default-features = false, features = ["std", "std_rng", "thread_rng"] }
-object_store = { version = "0.13.1", default-features = false, features = ["azure", "fs"] }
+object_store = { workspace = true, features = ["azure", "fs"] }
 sysinfo = { version = "0.38.1", default-features = false, features = ["system"] }
 
 [package.metadata.docs.rs]
@@ -165,6 +165,11 @@ name = "arrow_reader"
 required-features = ["arrow"]
 path = "./tests/arrow_reader/mod.rs"
 
+[[test]]
+name = "arrow_writer"
+required-features = ["arrow"]
+path = "./tests/arrow_writer.rs"
+
 [[test]]
 name = "encryption"
 required-features = ["arrow"]
diff --git a/parquet/README.md b/parquet/README.md
index 8317b4dbd4ff..9e4e91d85d73 100644
--- a/parquet/README.md
+++ b/parquet/README.md
@@ -77,6 +77,7 @@ Please see the [Implementation Status Page] on the [Apache Parquet] website for
 information on the status of this implementation.
 
 [implementation status page]: https://parquet.apache.org/docs/file-format/implementationstatus/
+[apache parquet]: https://parquet.apache.org/
 
 ## License
 
diff --git a/parquet/benches/arrow_reader.rs b/parquet/benches/arrow_reader.rs
index 2ea0706e3517..14fa16b3531e 100644
--- a/parquet/benches/arrow_reader.rs
+++ b/parquet/benches/arrow_reader.rs
@@ -326,6 +326,58 @@ where
     InMemoryPageIterator::new(pages)
 }
 
+fn build_delta_encoded_incr_primitive_page_iterator<T>(
+    column_desc: ColumnDescPtr,
+    null_density: f32,
+    increment: usize,
+    stepped: bool,
+) -> impl PageIterator + Clone
+where
+    T: parquet::data_type::DataType,
+    T::T: SampleUniform + FromPrimitive,
+{
+    let max_def_level = column_desc.max_def_level();
+    let max_rep_level = column_desc.max_rep_level();
+    let rep_levels = vec![0; VALUES_PER_PAGE];
+    let mut rng = seedable_rng();
+    let mut pages: Vec<Vec<parquet::column::page::Page>> = Vec::new();
+    let mut running_val: usize = 1;
+    for _i in 0..NUM_ROW_GROUPS {
+        let mut column_chunk_pages = Vec::new();
+        for _j in 0..PAGES_PER_GROUP {
+            // generate page
+            let mut values = Vec::with_capacity(VALUES_PER_PAGE);
+            let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE);
+            for k in 0..VALUES_PER_PAGE {
+                let def_level = if rng.random::<f32>() < null_density {
+                    max_def_level - 1
+                } else {
+                    max_def_level
+                };
+                if def_level == max_def_level {
+                    let value = FromPrimitive::from_usize(running_val).unwrap();
+                    running_val = if !stepped || k % 2 == 1 {
+                        running_val + increment
+                    } else {
+                        running_val
+                    };
+                    values.push(value);
+                }
+                def_levels.push(def_level);
+            }
+            let mut page_builder =
+                DataPageBuilderImpl::new(column_desc.clone(), values.len() as u32, true);
+            page_builder.add_rep_levels(max_rep_level, &rep_levels);
+            page_builder.add_def_levels(max_def_level, &def_levels);
+            page_builder.add_values::<T>(Encoding::DELTA_BINARY_PACKED, &values);
+            column_chunk_pages.push(page_builder.consume());
+        }
+        pages.push(column_chunk_pages);
+    }
+
+    InMemoryPageIterator::new(pages)
+}
+
 fn build_dictionary_encoded_primitive_page_iterator<T>(
     column_desc: ColumnDescPtr,
     null_density: f32,
@@ -439,6 +491,52 @@ fn build_plain_encoded_byte_array_page_iterator_inner(
     InMemoryPageIterator::new(pages)
 }
 
+fn build_constant_prefix_byte_array_page_iterator(
+    column_desc: ColumnDescPtr,
+    null_density: f32,
+    encoding: Encoding,
+    const_string: bool,
+) -> impl PageIterator + Clone {
+    let max_def_level = column_desc.max_def_level();
+    let max_rep_level = column_desc.max_rep_level();
+    let rep_levels = vec![0; VALUES_PER_PAGE];
+    let mut rng = seedable_rng();
+    let mut pages: Vec<Vec<parquet::column::page::Page>> = Vec::new();
+    for i in 0..NUM_ROW_GROUPS {
+        let mut column_chunk_pages = Vec::new();
+        for j in 0..PAGES_PER_GROUP {
+            // generate page
+            let mut values = Vec::with_capacity(VALUES_PER_PAGE);
+            let mut def_levels = Vec::with_capacity(VALUES_PER_PAGE);
+            for k in 0..VALUES_PER_PAGE {
+                let def_level = if rng.random::<f32>() < null_density {
+                    max_def_level - 1
+                } else {
+                    max_def_level
+                };
+                if def_level == max_def_level {
+                    let string_value = if const_string {
+                        "01234567890123456789012345678901".to_string()
+                    } else {
+                        format!("01234567890123456789012345678901:{:x}{j}{i}", (k % 16))
+                    };
+                    values.push(parquet::data_type::ByteArray::from(string_value.as_str()));
+                }
+                def_levels.push(def_level);
+            }
+            let mut page_builder =
+                DataPageBuilderImpl::new(column_desc.clone(), values.len() as u32, true);
+            page_builder.add_rep_levels(max_rep_level, &rep_levels);
+            page_builder.add_def_levels(max_def_level, &def_levels);
+            page_builder.add_values::<ByteArrayType>(encoding, &values);
+            column_chunk_pages.push(page_builder.consume());
+        }
+        pages.push(column_chunk_pages);
+    }
+
+    InMemoryPageIterator::new(pages)
+}
+
 fn build_plain_encoded_byte_array_page_iterator(
     column_desc: ColumnDescPtr,
     null_density: f32,
@@ -1094,6 +1192,99 @@ fn bench_primitive<T>(
         assert_eq!(count, EXPECTED_VALUE_COUNT);
     });
 
+    // binary packed same value
+    let data = build_delta_encoded_incr_primitive_page_iterator::<T>(
+        mandatory_column_desc.clone(),
+        0.0,
+        0,
+        false,
+    );
+    group.bench_function("binary packed single value", |b| {
+        b.iter(|| {
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
+            count = bench_array_reader(array_reader);
+        });
+        assert_eq!(count, EXPECTED_VALUE_COUNT);
+    });
+
+    let data = build_delta_encoded_incr_primitive_page_iterator::<T>(
+        mandatory_column_desc.clone(),
+        0.0,
+        0,
+        false,
+    );
+    group.bench_function("binary packed skip single value", |b| {
+        b.iter(|| {
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
+            count = bench_array_reader_skip(array_reader);
+        });
+        assert_eq!(count, EXPECTED_VALUE_COUNT);
+    });
+
+    // binary packed monotonically increasing
+    let data = build_delta_encoded_incr_primitive_page_iterator::<T>(
+        mandatory_column_desc.clone(),
+        0.0,
+        1,
+        false,
+    );
+    group.bench_function("binary packed increasing value", |b| {
+        b.iter(|| {
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
+            count = bench_array_reader(array_reader);
+        });
+        assert_eq!(count, EXPECTED_VALUE_COUNT);
+    });
+
+    let data = build_delta_encoded_incr_primitive_page_iterator::<T>(
+        mandatory_column_desc.clone(),
+        0.0,
+        1,
+        false,
+    );
+    group.bench_function("binary packed skip increasing value", |b| {
+        b.iter(|| {
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
+            count = bench_array_reader_skip(array_reader);
+        });
+        assert_eq!(count, EXPECTED_VALUE_COUNT);
+    });
+
+    // binary packed increasing stepped
+    let data = build_delta_encoded_incr_primitive_page_iterator::<T>(
+        mandatory_column_desc.clone(),
+        0.0,
+        1,
+        true,
+    );
+    group.bench_function("binary packed stepped increasing value", |b| {
+        b.iter(|| {
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
+            count = bench_array_reader(array_reader);
+        });
+        assert_eq!(count, EXPECTED_VALUE_COUNT);
+    });
+
+    let data = build_delta_encoded_incr_primitive_page_iterator::<T>(
+        mandatory_column_desc.clone(),
+        0.0,
+        1,
+        true,
+    );
+    group.bench_function("binary packed skip stepped increasing value", |b| {
+        b.iter(|| {
+            let array_reader =
+                create_primitive_array_reader(data.clone(), mandatory_column_desc.clone());
+            count = bench_array_reader_skip(array_reader);
+        });
+        assert_eq!(count, EXPECTED_VALUE_COUNT);
+    });
+
     // dictionary encoded, no NULLs
     let data =
         build_dictionary_encoded_primitive_page_iterator::<T>(mandatory_column_desc.clone(), 0.0);
@@ -1594,6 +1785,66 @@ fn add_benches(c: &mut Criterion) {
         assert_eq!(count, EXPECTED_VALUE_COUNT);
     });
 
+    // delta byte array with constant prefix and suffix lengths
+    let delta_string_const_prefix_no_null_data = build_constant_prefix_byte_array_page_iterator(
+        mandatory_string_column_desc.clone(),
+        0.0,
+        Encoding::DELTA_BYTE_ARRAY,
+        false,
+    );
+    group.bench_function(
+        "const prefix delta byte array encoded, mandatory, no NULLs",
+        |b| {
+            b.iter(|| {
+                let array_reader = create_byte_array_reader(
+                    delta_string_const_prefix_no_null_data.clone(),
+                    mandatory_string_column_desc.clone(),
+                );
+                count = bench_array_reader(array_reader);
+            });
+            assert_eq!(count, EXPECTED_VALUE_COUNT);
+        },
+    );
+
+    // delta byte array with constant prefix and no suffix
+    let delta_string_const_no_null_data = build_constant_prefix_byte_array_page_iterator(
+        mandatory_string_column_desc.clone(),
+        0.0,
+        Encoding::DELTA_BYTE_ARRAY,
+        true,
+    );
+    group.bench_function("const delta byte array encoded, mandatory, no NULLs", |b| {
+        b.iter(|| {
+            let array_reader = create_byte_array_reader(
+                delta_string_const_no_null_data.clone(),
+                mandatory_string_column_desc.clone(),
+            );
+            count = bench_array_reader(array_reader);
+        });
+        assert_eq!(count, EXPECTED_VALUE_COUNT);
+    });
+
+    // delta length byte array with constant lengths
+    let delta_string_const_no_null_data = build_constant_prefix_byte_array_page_iterator(
+        mandatory_string_column_desc.clone(),
+        0.0,
+        Encoding::DELTA_LENGTH_BYTE_ARRAY,
+        true,
+    );
+    group.bench_function(
+        "const delta length byte array encoded, mandatory, no NULLs",
+        |b| {
+            b.iter(|| {
+                let array_reader = create_byte_array_reader(
+                    delta_string_const_no_null_data.clone(),
+                    mandatory_string_column_desc.clone(),
+                );
+                count = bench_array_reader(array_reader);
+            });
+            assert_eq!(count, EXPECTED_VALUE_COUNT);
+        },
+    );
+
     group.finish();
 
     // binary benchmarks
diff --git a/parquet/benches/arrow_reader_clickbench.rs b/parquet/benches/arrow_reader_clickbench.rs
index 5a6fb36d5800..039829f1b975 100644
--- a/parquet/benches/arrow_reader_clickbench.rs
+++ b/parquet/benches/arrow_reader_clickbench.rs
@@ -598,27 +598,38 @@ impl Display for Query {
 /// FULL path to the ClickBench hits_1.parquet file
 static HITS_1_PATH: OnceLock<PathBuf> = OnceLock::new();
 
-/// Finds the paths to the ClickBench file, or panics with a useful message
-/// explaining how to download if it is not found
+/// Finds the paths to the ClickBench file, downloading it if not found
 fn hits_1() -> &'static Path {
     HITS_1_PATH.get_or_init(|| {
+        let current_dir = std::env::current_dir().expect("Failed to get current directory");
+        println!(
+            "Looking for ClickBench files starting in current_dir and all parent directories: {current_dir:?}"
+        );
 
-    let current_dir = std::env::current_dir().expect("Failed to get current directory");
-    println!(
-        "Looking for ClickBench files starting in current_dir and all parent directories: {current_dir:?}"
-
-    );
+        if let Some(hits_1_path) = find_file_if_exists(current_dir.clone(), "hits_1.parquet") {
+            return hits_1_path;
+        }
 
-    let Some(hits_1_path) = find_file_if_exists(current_dir.clone(), "hits_1.parquet") else {
-        eprintln!(
-            "Could not find hits_1.parquet in directory or parents: {current_dir:?}. Download it via",
+        // File not found, download it
+        let download_path = current_dir.join("hits_1.parquet");
+        let url = "https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet";
+        println!("hits_1.parquet not found, downloading from {url}...");
+
+        let status = std::process::Command::new("wget")
+            .args(["--continue", "-O"])
+            .arg(&download_path)
+            .arg(url)
+            .status()
+            .expect("Failed to execute wget. Please install wget or download manually.");
+
+        assert!(
+            status.success(),
+            "Failed to download hits_1.parquet. You can download it manually via:\n\
+             wget --continue {url}"
         );
-        eprintln!();
-        eprintln!("wget --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_1.parquet");
-        panic!("Stopping");
-    };
 
-    hits_1_path
+        println!("Downloaded hits_1.parquet to {download_path:?}");
+        download_path
     })
 }
 
diff --git a/parquet/benches/arrow_writer.rs b/parquet/benches/arrow_writer.rs
index b92f0788b2fc..2381941897c7 100644
--- a/parquet/benches/arrow_writer.rs
+++ b/parquet/benches/arrow_writer.rs
@@ -19,7 +19,7 @@
 extern crate criterion;
 
 use criterion::{Bencher, Criterion, Throughput};
-use parquet::arrow::arrow_writer::{ArrowRowGroupWriterFactory, compute_leaves};
+use parquet::arrow::ArrowWriter;
 use parquet::basic::{Compression, ZstdLevel};
 
 extern crate arrow;
@@ -33,10 +33,8 @@ use arrow::datatypes::*;
 use arrow::util::bench_util::{create_f16_array, create_f32_array, create_f64_array};
 use arrow::{record_batch::RecordBatch, util::data_gen::*};
 use arrow_array::RecordBatchOptions;
-use parquet::arrow::ArrowSchemaConverter;
 use parquet::errors::Result;
-use parquet::file::properties::{WriterProperties, WriterVersion};
-use parquet::file::writer::SerializedFileWriter;
+use parquet::file::properties::{CdcOptions, WriterProperties, WriterVersion};
 
 fn create_primitive_bench_batch(
     size: usize,
@@ -342,39 +340,21 @@ fn write_batch_with_option(
     batch: &RecordBatch,
     props: Option<WriterProperties>,
 ) -> Result<()> {
-    let mut file = Empty::default();
-    let props = Arc::new(props.unwrap_or_default());
-    let parquet_schema = ArrowSchemaConverter::new()
-        .with_coerce_types(props.coerce_types())
-        .convert(batch.schema_ref())?;
-    let writer = SerializedFileWriter::new(&mut file, parquet_schema.root_schema_ptr(), props)?;
-    let row_group_writer_factory = ArrowRowGroupWriterFactory::new(&writer, batch.schema());
+    let props = props.unwrap_or_default();
 
     bench.iter(|| {
-        let mut row_group = row_group_writer_factory.create_column_writers(0).unwrap();
-
-        let mut writers = row_group.iter_mut();
-        for (field, column) in batch
-            .schema()
-            .fields()
-            .iter()
-            .zip(black_box(batch).columns())
-        {
-            for leaf in compute_leaves(field.as_ref(), column).unwrap() {
-                writers.next().unwrap().write(&leaf).unwrap()
-            }
-        }
-
-        for writer in row_group.into_iter() {
-            black_box(writer.close()).unwrap();
-        }
+        let mut file = Empty::default();
+        let mut writer =
+            ArrowWriter::try_new(&mut file, batch.schema(), Some(props.clone())).unwrap();
+        writer.write(black_box(batch)).unwrap();
+        black_box(writer.close()).unwrap();
     });
 
     Ok(())
 }
 
 fn create_batches() -> Vec<(&'static str, RecordBatch)> {
-    const BATCH_SIZE: usize = 4096;
+    const BATCH_SIZE: usize = 1024 * 1024;
 
     let mut batches = vec![];
 
@@ -440,6 +420,11 @@ fn create_writer_props() -> Vec<(&'static str, WriterProperties)> {
         .build();
     props.push(("zstd_parquet_2", prop));
 
+    let prop = WriterProperties::builder()
+        .set_content_defined_chunking(Some(CdcOptions::default()))
+        .build();
+    props.push(("cdc", prop));
+
     props
 }
 
diff --git a/parquet/pytest/requirements.txt b/parquet/pytest/requirements.txt
index 40797e5e7a20..696fc2f0d63c 100644
--- a/parquet/pytest/requirements.txt
+++ b/parquet/pytest/requirements.txt
@@ -24,29 +24,34 @@ attrs==22.1.0 \
     --hash=sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6 \
     --hash=sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c
     # via pytest
-black==24.3.0 \
-    --hash=sha256:2818cf72dfd5d289e48f37ccfa08b460bf469e67fb7c4abb07edc2e9f16fb63f \
-    --hash=sha256:41622020d7120e01d377f74249e677039d20e6344ff5851de8a10f11f513bf93 \
-    --hash=sha256:4acf672def7eb1725f41f38bf6bf425c8237248bb0804faa3965c036f7672d11 \
-    --hash=sha256:4be5bb28e090456adfc1255e03967fb67ca846a03be7aadf6249096100ee32d0 \
-    --hash=sha256:4f1373a7808a8f135b774039f61d59e4be7eb56b2513d3d2f02a8b9365b8a8a9 \
-    --hash=sha256:56f52cfbd3dabe2798d76dbdd299faa046a901041faf2cf33288bc4e6dae57b5 \
-    --hash=sha256:65b76c275e4c1c5ce6e9870911384bff5ca31ab63d19c76811cb1fb162678213 \
-    --hash=sha256:65c02e4ea2ae09d16314d30912a58ada9a5c4fdfedf9512d23326128ac08ac3d \
-    --hash=sha256:6905238a754ceb7788a73f02b45637d820b2f5478b20fec82ea865e4f5d4d9f7 \
-    --hash=sha256:79dcf34b33e38ed1b17434693763301d7ccbd1c5860674a8f871bd15139e7837 \
-    --hash=sha256:7bb041dca0d784697af4646d3b62ba4a6b028276ae878e53f6b4f74ddd6db99f \
-    --hash=sha256:7d5e026f8da0322b5662fa7a8e752b3fa2dac1c1cbc213c3d7ff9bdd0ab12395 \
-    --hash=sha256:9f50ea1132e2189d8dff0115ab75b65590a3e97de1e143795adb4ce317934995 \
-    --hash=sha256:a0c9c4a0771afc6919578cec71ce82a3e31e054904e7197deacbc9382671c41f \
-    --hash=sha256:aadf7a02d947936ee418777e0247ea114f78aff0d0959461057cae8a04f20597 \
-    --hash=sha256:b5991d523eee14756f3c8d5df5231550ae8993e2286b8014e2fdea7156ed0959 \
-    --hash=sha256:bf21b7b230718a5f08bd32d5e4f1db7fc8788345c8aea1d155fc17852b3410f5 \
-    --hash=sha256:c45f8dff244b3c431b36e3224b6be4a127c6aca780853574c00faf99258041eb \
-    --hash=sha256:c7ed6668cbbfcd231fa0dc1b137d3e40c04c7f786e626b405c62bcd5db5857e4 \
-    --hash=sha256:d7de8d330763c66663661a1ffd432274a2f92f07feeddd89ffd085b5744f85e7 \
-    --hash=sha256:e19cb1c6365fd6dc38a6eae2dcb691d7d83935c10215aef8e6c38edee3f77abd \
-    --hash=sha256:e2af80566f43c85f5797365077fb64a393861a3730bd110971ab7a0c94e873e7
+black==26.3.1 \
+    --hash=sha256:0126ae5b7c09957da2bdbd91a9ba1207453feada9e9fe51992848658c6c8e01c \
+    --hash=sha256:0f76ff19ec5297dd8e66eb64deda23631e642c9393ab592826fd4bdc97a4bce7 \
+    --hash=sha256:28ef38aee69e4b12fda8dba75e21f9b4f979b490c8ac0baa7cb505369ac9e1ff \
+    --hash=sha256:2bd5aa94fc267d38bb21a70d7410a89f1a1d318841855f698746f8e7f51acd1b \
+    --hash=sha256:2c50f5063a9641c7eed7795014ba37b0f5fa227f3d408b968936e24bc0566b07 \
+    --hash=sha256:2d6bfaf7fd0993b420bed691f20f9492d53ce9a2bcccea4b797d34e947318a78 \
+    --hash=sha256:41cd2012d35b47d589cb8a16faf8a32ef7a336f56356babd9fcf70939ad1897f \
+    --hash=sha256:474c27574d6d7037c1bc875a81d9be0a9a4f9ee95e62800dab3cfaadbf75acd5 \
+    --hash=sha256:5602bdb96d52d2d0672f24f6ffe5218795736dd34807fd0fd55ccd6bf206168b \
+    --hash=sha256:5e9d0d86df21f2e1677cc4bd090cd0e446278bcbbe49bf3659c308c3e402843e \
+    --hash=sha256:5ed0ca58586c8d9a487352a96b15272b7fa55d139fc8496b519e78023a8dab0a \
+    --hash=sha256:6c54a4a82e291a1fee5137371ab488866b7c86a3305af4026bdd4dc78642e1ac \
+    --hash=sha256:6e131579c243c98f35bce64a7e08e87fb2d610544754675d4a0e73a070a5aa3a \
+    --hash=sha256:855822d90f884905362f602880ed8b5df1b7e3ee7d0db2502d4388a954cc8c54 \
+    --hash=sha256:86a8b5035fce64f5dcd1b794cf8ec4d31fe458cf6ce3986a30deb434df82a1d2 \
+    --hash=sha256:8a33d657f3276328ce00e4d37fe70361e1ec7614da5d7b6e78de5426cb56332f \
+    --hash=sha256:92c0ec1f2cc149551a2b7b47efc32c866406b6891b0ee4625e95967c8f4acfb1 \
+    --hash=sha256:9a5e9f45e5d5e1c5b5c29b3bd4265dcc90e8b92cf4534520896ed77f791f4da5 \
+    --hash=sha256:afc622538b430aa4c8c853f7f63bc582b3b8030fd8c80b70fb5fa5b834e575c2 \
+    --hash=sha256:b07fc0dab849d24a80a29cfab8d8a19187d1c4685d8a5e6385a5ce323c1f015f \
+    --hash=sha256:b5e6f89631eb88a7302d416594a32faeee9fb8fb848290da9d0a5f2903519fc1 \
+    --hash=sha256:bf9bf162ed91a26f1adba8efda0b573bc6924ec1408a52cc6f82cb73ec2b142c \
+    --hash=sha256:c7e72339f841b5a237ff14f7d3880ddd0fc7f98a1199e8c4327f9a4f478c1839 \
+    --hash=sha256:ddb113db38838eb9f043623ba274cfaf7d51d5b0c22ecb30afe58b1bb8322983 \
+    --hash=sha256:dfdd51fc3e64ea4f35873d1b3fb25326773d55d2329ff8449139ebaad7357efb \
+    --hash=sha256:f1cd08e99d2f9317292a311dfe578fd2a24b15dbce97792f9c4d752275c1fa56 \
+    --hash=sha256:f89f2ab047c76a9c03f78d0d66ca519e389519902fa27e7a91117ef7611c0568
     # via -r requirements.in
 click==8.1.3 \
     --hash=sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e \
@@ -129,9 +134,9 @@ pandas==1.5.2 \
     --hash=sha256:e9dbacd22555c2d47f262ef96bb4e30880e5956169741400af8b306bbb24a273 \
     --hash=sha256:f6257b314fc14958f8122779e5a1557517b0f8e500cfb2bd53fa1f75a8ad0af2
     # via -r requirements.in
-pathspec==0.10.2 \
-    --hash=sha256:88c2606f2c1e818b978540f73ecc908e13999c6c3a383daf3705652ae79807a5 \
-    --hash=sha256:8f6bf73e5758fd365ef5d58ce09ac7c27d2833a8d7da51712eac6e27e35141b0
+pathspec==1.0.4 \
+    --hash=sha256:0210e2ae8a21a9137c0d470578cb0e595af87edaa6ebf12ff176f14a02e0e645 \
+    --hash=sha256:fb6ae2fd4e7c921a165808a552060e722767cfa526f99ca5156ed2ce45a5c723
     # via black
 platformdirs==2.5.4 \
     --hash=sha256:1006647646d80f16130f052404c6b901e80ee4ed6bef6792e1f238a8969106f7 \
@@ -156,6 +161,50 @@ python-dateutil==2.8.2 \
     --hash=sha256:0123cacc1627ae19ddf3c27a5de5bd67ee4586fbdd6440d9748f8abb483d3e86 \
     --hash=sha256:961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
     # via pandas
+pytokens==0.4.1 \
+    --hash=sha256:0fc71786e629cef478cbf29d7ea1923299181d0699dbe7c3c0f4a583811d9fc1 \
+    --hash=sha256:11edda0942da80ff58c4408407616a310adecae1ddd22eef8c692fe266fa5009 \
+    --hash=sha256:140709331e846b728475786df8aeb27d24f48cbcf7bcd449f8de75cae7a45083 \
+    --hash=sha256:24afde1f53d95348b5a0eb19488661147285ca4dd7ed752bbc3e1c6242a304d1 \
+    --hash=sha256:26cef14744a8385f35d0e095dc8b3a7583f6c953c2e3d269c7f82484bf5ad2de \
+    --hash=sha256:27b83ad28825978742beef057bfe406ad6ed524b2d28c252c5de7b4a6dd48fa2 \
+    --hash=sha256:292052fe80923aae2260c073f822ceba21f3872ced9a68bb7953b348e561179a \
+    --hash=sha256:29d1d8fb1030af4d231789959f21821ab6325e463f0503a61d204343c9b355d1 \
+    --hash=sha256:2a44ed93ea23415c54f3face3b65ef2b844d96aeb3455b8a69b3df6beab6acc5 \
+    --hash=sha256:30f51edd9bb7f85c748979384165601d028b84f7bd13fe14d3e065304093916a \
+    --hash=sha256:34bcc734bd2f2d5fe3b34e7b3c0116bfb2397f2d9666139988e7a3eb5f7400e3 \
+    --hash=sha256:3ad72b851e781478366288743198101e5eb34a414f1d5627cdd585ca3b25f1db \
+    --hash=sha256:3f901fe783e06e48e8cbdc82d631fca8f118333798193e026a50ce1b3757ea68 \
+    --hash=sha256:42f144f3aafa5d92bad964d471a581651e28b24434d184871bd02e3a0d956037 \
+    --hash=sha256:4a14d5f5fc78ce85e426aa159489e2d5961acf0e47575e08f35584009178e321 \
+    --hash=sha256:4a58d057208cb9075c144950d789511220b07636dd2e4708d5645d24de666bdc \
+    --hash=sha256:4e691d7f5186bd2842c14813f79f8884bb03f5995f0575272009982c5ac6c0f7 \
+    --hash=sha256:5502408cab1cb18e128570f8d598981c68a50d0cbd7c61312a90507cd3a1276f \
+    --hash=sha256:584c80c24b078eec1e227079d56dc22ff755e0ba8654d8383b2c549107528918 \
+    --hash=sha256:5ad948d085ed6c16413eb5fec6b3e02fa00dc29a2534f088d3302c47eb59adf9 \
+    --hash=sha256:670d286910b531c7b7e3c0b453fd8156f250adb140146d234a82219459b9640c \
+    --hash=sha256:682fa37ff4d8e95f7df6fe6fe6a431e8ed8e788023c6bcc0f0880a12eab80ad1 \
+    --hash=sha256:6d6c4268598f762bc8e91f5dbf2ab2f61f7b95bdc07953b602db879b3c8c18e1 \
+    --hash=sha256:79fc6b8699564e1f9b521582c35435f1bd32dd06822322ec44afdeba666d8cb3 \
+    --hash=sha256:8bdb9d0ce90cbf99c525e75a2fa415144fd570a1ba987380190e8b786bc6ef9b \
+    --hash=sha256:8fcb9ba3709ff77e77f1c7022ff11d13553f3c30299a9fe246a166903e9091eb \
+    --hash=sha256:941d4343bf27b605e9213b26bfa1c4bf197c9c599a9627eb7305b0defcfe40c1 \
+    --hash=sha256:967cf6e3fd4adf7de8fc73cd3043754ae79c36475c1c11d514fc72cf5490094a \
+    --hash=sha256:970b08dd6b86058b6dc07efe9e98414f5102974716232d10f32ff39701e841c4 \
+    --hash=sha256:97f50fd18543be72da51dd505e2ed20d2228c74e0464e4262e4899797803d7fa \
+    --hash=sha256:9bd7d7f544d362576be74f9d5901a22f317efc20046efe2034dced238cbbfe78 \
+    --hash=sha256:add8bf86b71a5d9fb5b89f023a80b791e04fba57960aa790cc6125f7f1d39dfe \
+    --hash=sha256:b35d7e5ad269804f6697727702da3c517bb8a5228afa450ab0fa787732055fc9 \
+    --hash=sha256:b49750419d300e2b5a3813cf229d4e5a4c728dae470bcc89867a9ad6f25a722d \
+    --hash=sha256:d31b97b3de0f61571a124a00ffe9a81fb9939146c122c11060725bd5aea79975 \
+    --hash=sha256:d70e77c55ae8380c91c0c18dea05951482e263982911fc7410b1ffd1dadd3440 \
+    --hash=sha256:d9907d61f15bf7261d7e775bd5d7ee4d2930e04424bab1972591918497623a16 \
+    --hash=sha256:da5baeaf7116dced9c6bb76dc31ba04a2dc3695f3d9f74741d7910122b456edc \
+    --hash=sha256:dc74c035f9bfca0255c1af77ddd2d6ae8419012805453e4b0e7513e17904545d \
+    --hash=sha256:dcafc12c30dbaf1e2af0490978352e0c4041a7cde31f4f81435c2a5e8b9cabb6 \
+    --hash=sha256:ee44d0f85b803321710f9239f335aafe16553b39106384cef8e6de40cb4ef2f6 \
+    --hash=sha256:f66a6bbe741bd431f6d741e617e0f39ec7257ca1f89089593479347cc4d13324
+    # via black
 pytz==2022.6 \
     --hash=sha256:222439474e9c98fced559f1709d89e6c9cbf8d79c794ff3eb9f8800064291427 \
     --hash=sha256:e89512406b793ca39f5971bc999cc538ce125c0e51c27941bef4568b460095e2
diff --git a/parquet/src/arrow/array_reader/byte_array.rs b/parquet/src/arrow/array_reader/byte_array.rs
index 0acbe6501924..2d0d44fbe203 100644
--- a/parquet/src/arrow/array_reader/byte_array.rs
+++ b/parquet/src/arrow/array_reader/byte_array.rs
@@ -580,6 +580,9 @@ impl ByteArrayDecoderDictionary {
             return Ok(0);
         }
 
+        // Pre-reserve offsets capacity to avoid per-chunk reallocation
+        output.offsets.reserve(len);
+
         self.decoder.read(len, |keys| {
             output.extend_from_dictionary(keys, dict.offsets.as_slice(), dict.values.as_slice())
         })
diff --git a/parquet/src/arrow/array_reader/byte_view_array.rs b/parquet/src/arrow/array_reader/byte_view_array.rs
index 65b627aae451..1933654118f3 100644
--- a/parquet/src/arrow/array_reader/byte_view_array.rs
+++ b/parquet/src/arrow/array_reader/byte_view_array.rs
@@ -500,6 +500,9 @@ impl ByteViewArrayDecoderDictionary {
         // then the base_buffer_idx is 5 - 2 = 3
         let base_buffer_idx = output.buffers.len() as u32 - dict.buffers.len() as u32;
 
+        // Pre-reserve output capacity to avoid per-chunk reallocation in extend
+        output.views.reserve(len);
+
         let mut error = None;
         let read = self.decoder.read(len, |keys| {
             if base_buffer_idx == 0 {
diff --git a/parquet/src/arrow/array_reader/primitive_array.rs b/parquet/src/arrow/array_reader/primitive_array.rs
index dae42c4c7124..e1c944f60c42 100644
--- a/parquet/src/arrow/array_reader/primitive_array.rs
+++ b/parquet/src/arrow/array_reader/primitive_array.rs
@@ -163,7 +163,7 @@ where
         let nulls = self
             .record_reader
             .consume_bitmap_buffer()
-            .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, len)));
+            .and_then(|b| NullBuffer::from_unsliced_buffer(b, len));
 
         let array: ArrayRef = match T::get_physical_type() {
             PhysicalType::BOOLEAN => Arc::new(BooleanArray::new(
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
index 670f9d80c5a3..1b02c4ae25d3 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -3510,7 +3510,7 @@ pub(crate) mod tests {
                     })
                     .collect()
             }
-            None => values.iter().flatten().map(|b| Some(b.clone())).collect(),
+            None => values.iter().flatten().cloned().map(Some).collect(),
         };
         data
     }
diff --git a/parquet/src/arrow/arrow_reader/read_plan.rs b/parquet/src/arrow/arrow_reader/read_plan.rs
index 7c9eb36befe3..99ffe0febc95 100644
--- a/parquet/src/arrow/arrow_reader/read_plan.rs
+++ b/parquet/src/arrow/arrow_reader/read_plan.rs
@@ -167,6 +167,13 @@ impl ReadPlanBuilder {
             };
         }
 
+        // If the predicate selected all rows and there is no prior selection,
+        // skip creating a RowSelection entirely — this avoids the allocation
+        // and keeps selection as None which enables coalesced page fetches.
+        let all_selected = filters.iter().all(|f| f.true_count() == f.len());
+        if all_selected && self.selection.is_none() {
+            return Ok(self);
+        }
         let raw = RowSelection::from_filters(&filters);
         self.selection = match self.selection.take() {
             Some(selection) => Some(selection.and_then(&raw)),
diff --git a/parquet/src/arrow/arrow_writer/levels.rs b/parquet/src/arrow/arrow_writer/levels.rs
index 0ff2137d907e..2ebe1319160f 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -40,6 +40,7 @@
 //!
 //! \[1\] [parquet-format#nested-encoding](https://github.com/apache/parquet-format#nested-encoding)
 
+use crate::column::chunker::CdcChunk;
 use crate::errors::{ParquetError, Result};
 use arrow_array::cast::AsArray;
 use arrow_array::{Array, ArrayRef, OffsetSizeTrait};
@@ -801,11 +802,47 @@ impl ArrayLevels {
     pub fn non_null_indices(&self) -> &[usize] {
         &self.non_null_indices
     }
+
+    /// Create a sliced view of this `ArrayLevels` for a CDC chunk.
+    ///
+    /// The chunk's `value_offset`/`num_values` select the relevant slice of
+    /// `non_null_indices`. The array is sliced to the range covered by
+    /// those indices, and they are shifted to be relative to the slice.
+    pub(crate) fn slice_for_chunk(&self, chunk: &CdcChunk) -> Self {
+        let def_levels = self.def_levels.as_ref().map(|levels| {
+            levels[chunk.level_offset..chunk.level_offset + chunk.num_levels].to_vec()
+        });
+        let rep_levels = self.rep_levels.as_ref().map(|levels| {
+            levels[chunk.level_offset..chunk.level_offset + chunk.num_levels].to_vec()
+        });
+
+        // Select the non-null indices for this chunk.
+        let nni = &self.non_null_indices[chunk.value_offset..chunk.value_offset + chunk.num_values];
+        // Compute the array range spanned by the non-null indices
+        let start = nni.first().copied().unwrap_or(0);
+        let end = nni.last().map_or(0, |&i| i + 1);
+        // Shift indices to be relative to the sliced array.
+        let non_null_indices = nni.iter().map(|&idx| idx - start).collect();
+        // Slice the array to the computed range.
+        let array = self.array.slice(start, end - start);
+        let logical_nulls = array.logical_nulls();
+
+        Self {
+            def_levels,
+            rep_levels,
+            non_null_indices,
+            max_def_level: self.max_def_level,
+            max_rep_level: self.max_rep_level,
+            array,
+            logical_nulls,
+        }
+    }
 }
 
 #[cfg(test)]
 mod tests {
     use super::*;
+    use crate::column::chunker::CdcChunk;
 
     use arrow_array::builder::*;
     use arrow_array::types::Int32Type;
@@ -2096,4 +2133,141 @@ mod tests {
         let v = Arc::new(array) as ArrayRef;
         LevelInfoBuilder::try_new(field, Default::default(), &v).unwrap()
     }
+
+    #[test]
+    fn test_slice_for_chunk_flat() {
+        // Case 1: required field (max_def_level=0, no def/rep levels stored).
+        // Array has 6 values; all are non-null so non_null_indices covers every position.
+        // value_offset=2, num_values=3 → non_null_indices[2..5] = [2,3,4].
+        // Array is sliced (no def_levels → write_batch_internal uses values.len()).
+        let array: ArrayRef = Arc::new(Int32Array::from(vec![1, 2, 3, 4, 5, 6]));
+        let logical_nulls = array.logical_nulls();
+        let levels = ArrayLevels {
+            def_levels: None,
+            rep_levels: None,
+            non_null_indices: vec![0, 1, 2, 3, 4, 5],
+            max_def_level: 0,
+            max_rep_level: 0,
+            array,
+            logical_nulls,
+        };
+        let sliced = levels.slice_for_chunk(&CdcChunk {
+            level_offset: 0,
+            num_levels: 0,
+            value_offset: 2,
+            num_values: 3,
+        });
+        assert!(sliced.def_levels.is_none());
+        assert!(sliced.rep_levels.is_none());
+        assert_eq!(sliced.non_null_indices, vec![0, 1, 2]);
+        assert_eq!(sliced.array.len(), 3);
+
+        // Case 2: optional field (max_def_level=1, def levels present, no rep levels).
+        // Array: [Some(1), None, Some(3), None, Some(5), Some(6)]
+        // non_null_indices: [0, 2, 4, 5]
+        // value_offset=1, num_values=1 → non_null_indices[1..2] = [2].
+        // Array is not sliced (def_levels present → num_levels from def_levels.len()).
+        let array: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(1),
+            None,
+            Some(3),
+            None,
+            Some(5),
+            Some(6),
+        ]));
+        let logical_nulls = array.logical_nulls();
+        let levels = ArrayLevels {
+            def_levels: Some(vec![1, 0, 1, 0, 1, 1]),
+            rep_levels: None,
+            non_null_indices: vec![0, 2, 4, 5],
+            max_def_level: 1,
+            max_rep_level: 0,
+            array,
+            logical_nulls,
+        };
+        let sliced = levels.slice_for_chunk(&CdcChunk {
+            level_offset: 1,
+            num_levels: 3,
+            value_offset: 1,
+            num_values: 1,
+        });
+        assert_eq!(sliced.def_levels, Some(vec![0, 1, 0]));
+        assert!(sliced.rep_levels.is_none());
+        assert_eq!(sliced.non_null_indices, vec![0]); // [2] shifted by -2 (nni[0])
+        assert_eq!(sliced.array.len(), 1);
+    }
+
+    #[test]
+    fn test_slice_for_chunk_nested_with_nulls() {
+        // Regression test for https://github.com/apache/arrow-rs/issues/9637
+        //
+        // Simulates a List<Int32?> where null list entries have non-zero child
+        // ranges (valid per Arrow spec: "a null value may correspond to a
+        // non-empty segment in the child array"). This creates gaps in the
+        // leaf array that don't correspond to any levels.
+        //
+        // 5 rows with 2 null list entries owning non-empty child ranges:
+        //   row 0: [1]       → leaf[0]
+        //   row 1: null list → owns leaf[1..3] (gap of 2)
+        //   row 2: [2, null] → leaf[3], leaf[4]=null element
+        //   row 3: null list → owns leaf[5..8] (gap of 3)
+        //   row 4: [4, 5]   → leaf[8], leaf[9]
+        //
+        // def_levels: [3,  0,  3, 2,  0,  3, 3]
+        // rep_levels: [0,  0,  0, 1,  0,  0, 1]
+        // non_null_indices: [0, 3, 8, 9]
+        //   gaps in array: 0→3 (skip 1,2), 3→8 (skip 5,6,7)
+        let array: ArrayRef = Arc::new(Int32Array::from(vec![
+            Some(1), // 0: row 0
+            None,    // 1: gap (null list row 1)
+            None,    // 2: gap (null list row 1)
+            Some(2), // 3: row 2
+            None,    // 4: row 2, null element
+            None,    // 5: gap (null list row 3)
+            None,    // 6: gap (null list row 3)
+            None,    // 7: gap (null list row 3)
+            Some(4), // 8: row 4
+            Some(5), // 9: row 4
+        ]));
+        let logical_nulls = array.logical_nulls();
+        let levels = ArrayLevels {
+            def_levels: Some(vec![3, 0, 3, 2, 0, 3, 3]),
+            rep_levels: Some(vec![0, 0, 0, 1, 0, 0, 1]),
+            non_null_indices: vec![0, 3, 8, 9],
+            max_def_level: 3,
+            max_rep_level: 1,
+            array,
+            logical_nulls,
+        };
+
+        // Chunk 0: rows 0-1, nni=[0] → array sliced to [0..1]
+        let chunk0 = levels.slice_for_chunk(&CdcChunk {
+            level_offset: 0,
+            num_levels: 2,
+            value_offset: 0,
+            num_values: 1,
+        });
+        assert_eq!(chunk0.non_null_indices, vec![0]);
+        assert_eq!(chunk0.array.len(), 1);
+
+        // Chunk 1: rows 2-3, nni=[3] → array sliced to [3..4]
+        let chunk1 = levels.slice_for_chunk(&CdcChunk {
+            level_offset: 2,
+            num_levels: 3,
+            value_offset: 1,
+            num_values: 1,
+        });
+        assert_eq!(chunk1.non_null_indices, vec![0]);
+        assert_eq!(chunk1.array.len(), 1);
+
+        // Chunk 2: row 4, nni=[8, 9] → array sliced to [8..10]
+        let chunk2 = levels.slice_for_chunk(&CdcChunk {
+            level_offset: 5,
+            num_levels: 2,
+            value_offset: 2,
+            num_values: 2,
+        });
+        assert_eq!(chunk2.non_null_indices, vec![0, 1]);
+        assert_eq!(chunk2.array.len(), 2);
+    }
 }
diff --git a/parquet/src/arrow/arrow_writer/mod.rs b/parquet/src/arrow/arrow_writer/mod.rs
index 979988eebc05..2ef71d5745a2 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -17,6 +17,8 @@
 
 //! Contains writer which writes arrow data into parquet data.
 
+use crate::column::chunker::ContentDefinedChunker;
+
 use bytes::Bytes;
 use std::io::{Read, Write};
 use std::iter::Peekable;
@@ -192,6 +194,9 @@ pub struct ArrowWriter<W: Write> {
 
     /// The maximum size in bytes for a row group, or None for unlimited
     max_row_group_bytes: Option<usize>,
+
+    /// CDC chunkers persisted across row groups (one per leaf column).
+    cdc_chunkers: Option<Vec<ContentDefinedChunker>>,
 }
 
 impl<W: Write + Send> std::fmt::Debug for ArrowWriter<W> {
@@ -261,6 +266,18 @@ impl<W: Write + Send> ArrowWriter<W> {
         let row_group_writer_factory =
             ArrowRowGroupWriterFactory::new(&file_writer, arrow_schema.clone());
 
+        let cdc_chunkers = props_ptr
+            .content_defined_chunking()
+            .map(|opts| {
+                file_writer
+                    .schema_descr()
+                    .columns()
+                    .iter()
+                    .map(|desc| ContentDefinedChunker::new(desc, opts))
+                    .collect::<Result<Vec<_>>>()
+            })
+            .transpose()?;
+
         Ok(Self {
             writer: file_writer,
             in_progress: None,
@@ -268,6 +285,7 @@ impl<W: Write + Send> ArrowWriter<W> {
             row_group_writer_factory,
             max_row_group_row_count,
             max_row_group_bytes,
+            cdc_chunkers,
         })
     }
 
@@ -383,7 +401,10 @@ impl<W: Write + Send> ArrowWriter<W> {
             }
         }
 
-        in_progress.write(batch)?;
+        match self.cdc_chunkers.as_mut() {
+            Some(chunkers) => in_progress.write_with_chunkers(batch, chunkers)?,
+            None => in_progress.write(batch)?,
+        }
 
         let should_flush = self
             .max_row_group_row_count
@@ -869,20 +890,50 @@ enum ArrowColumnWriterImpl {
 impl ArrowColumnWriter {
     /// Write an [`ArrowLeafColumn`]
     pub fn write(&mut self, col: &ArrowLeafColumn) -> Result<()> {
+        self.write_internal(&col.0)
+    }
+
+    /// Write with content-defined chunking, inserting page flushes at chunk boundaries.
+    fn write_with_chunker(
+        &mut self,
+        col: &ArrowLeafColumn,
+        chunker: &mut ContentDefinedChunker,
+    ) -> Result<()> {
+        let levels = &col.0;
+        let chunks =
+            chunker.get_arrow_chunks(levels.def_levels(), levels.rep_levels(), levels.array())?;
+
+        let num_chunks = chunks.len();
+        for (i, chunk) in chunks.iter().enumerate() {
+            let chunk_levels = levels.slice_for_chunk(chunk);
+            self.write_internal(&chunk_levels)?;
+
+            // Add a page break after each chunk except the last
+            if i + 1 < num_chunks {
+                match &mut self.writer {
+                    ArrowColumnWriterImpl::Column(c) => c.add_data_page()?,
+                    ArrowColumnWriterImpl::ByteArray(c) => c.add_data_page()?,
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn write_internal(&mut self, levels: &ArrayLevels) -> Result<()> {
         match &mut self.writer {
             ArrowColumnWriterImpl::Column(c) => {
-                let leaf = col.0.array();
+                let leaf = levels.array();
                 match leaf.as_any_dictionary_opt() {
                     Some(dictionary) => {
                         let materialized =
                             arrow_select::take::take(dictionary.values(), dictionary.keys(), None)?;
-                        write_leaf(c, &materialized, &col.0)?
+                        write_leaf(c, &materialized, levels)?
                     }
-                    None => write_leaf(c, leaf, &col.0)?,
+                    None => write_leaf(c, leaf, levels)?,
                 };
             }
             ArrowColumnWriterImpl::ByteArray(c) => {
-                write_primitive(c, col.0.array().as_ref(), &col.0)?;
+                write_primitive(c, levels.array().as_ref(), levels)?;
             }
         }
         Ok(())
@@ -958,7 +1009,26 @@ impl ArrowRowGroupWriter {
         let mut writers = self.writers.iter_mut();
         for (field, column) in self.schema.fields().iter().zip(batch.columns()) {
             for leaf in compute_leaves(field.as_ref(), column)? {
-                writers.next().unwrap().write(&leaf)?
+                writers.next().unwrap().write(&leaf)?;
+            }
+        }
+        Ok(())
+    }
+
+    fn write_with_chunkers(
+        &mut self,
+        batch: &RecordBatch,
+        chunkers: &mut [ContentDefinedChunker],
+    ) -> Result<()> {
+        self.buffered_rows += batch.num_rows();
+        let mut writers = self.writers.iter_mut();
+        let mut chunkers = chunkers.iter_mut();
+        for (field, column) in self.schema.fields().iter().zip(batch.columns()) {
+            for leaf in compute_leaves(field.as_ref(), column)? {
+                writers
+                    .next()
+                    .unwrap()
+                    .write_with_chunker(&leaf, chunkers.next().unwrap())?;
             }
         }
         Ok(())
diff --git a/parquet/src/arrow/buffer/view_buffer.rs b/parquet/src/arrow/buffer/view_buffer.rs
index 1cccfd0f1b20..a93674663f7b 100644
--- a/parquet/src/arrow/buffer/view_buffer.rs
+++ b/parquet/src/arrow/buffer/view_buffer.rs
@@ -17,7 +17,7 @@
 
 use crate::arrow::record_reader::buffer::ValuesBuffer;
 use arrow_array::{ArrayRef, BinaryViewArray, StringViewArray};
-use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer, ScalarBuffer};
+use arrow_buffer::{Buffer, NullBuffer, ScalarBuffer};
 use arrow_schema::DataType as ArrowType;
 use std::sync::Arc;
 
@@ -56,9 +56,7 @@ impl ViewBuffer {
     pub fn into_array(self, null_buffer: Option<Buffer>, data_type: &ArrowType) -> ArrayRef {
         let len = self.views.len();
         let views = ScalarBuffer::from(self.views);
-        let nulls = null_buffer
-            .map(|b| NullBuffer::new(BooleanBuffer::new(b, 0, len)))
-            .filter(|n| n.null_count() != 0);
+        let nulls = null_buffer.and_then(|b| NullBuffer::from_unsliced_buffer(b, len));
         match data_type {
             ArrowType::Utf8View => {
                 // Safety: views were created correctly, and checked that the data is utf8 when building the buffer
diff --git a/parquet/src/arrow/push_decoder/reader_builder/mod.rs b/parquet/src/arrow/push_decoder/reader_builder/mod.rs
index 8fa299be884f..d3d78ca7c263 100644
--- a/parquet/src/arrow/push_decoder/reader_builder/mod.rs
+++ b/parquet/src/arrow/push_decoder/reader_builder/mod.rs
@@ -437,6 +437,13 @@ impl RowGroupReaderBuilder {
                     .with_parquet_metadata(&self.metadata)
                     .build_array_reader(self.fields.as_deref(), predicate.projection())?;
 
+                // Reset to original policy before each predicate so the override
+                // can detect page skipping for THIS predicate's columns.
+                // Without this reset, a prior predicate's override (e.g. Mask)
+                // carries forward and the check returns early, missing unfetched
+                // pages for subsequent predicates.
+                plan_builder = plan_builder.with_row_selection_policy(self.row_selection_policy);
+
                 // Prepare to evaluate the filter.
                 // Note: first update the selection strategy to properly handle any pages
                 // pruned during fetch
diff --git a/parquet/src/bloom_filter/mod.rs b/parquet/src/bloom_filter/mod.rs
index 1f77e492ccf1..933b5a269fff 100644
--- a/parquet/src/bloom_filter/mod.rs
+++ b/parquet/src/bloom_filter/mod.rs
@@ -232,8 +232,10 @@ fn read_bloom_filter_header_and_length_from_bytes(
     Ok((header, (total_length - prot.as_slice().len()) as u64))
 }
 
-pub(crate) const BITSET_MIN_LENGTH: usize = 32;
-pub(crate) const BITSET_MAX_LENGTH: usize = 128 * 1024 * 1024;
+/// The minimum number of bytes for a bloom filter bitset.
+pub const BITSET_MIN_LENGTH: usize = 32;
+/// The maximum number of bytes for a bloom filter bitset.
+pub const BITSET_MAX_LENGTH: usize = 128 * 1024 * 1024;
 
 #[inline]
 fn optimal_num_of_bytes(num_bytes: usize) -> usize {
@@ -255,7 +257,7 @@ fn num_of_bits_from_ndv_fpp(ndv: u64, fpp: f64) -> usize {
 impl Sbbf {
     /// Create a new [Sbbf] with given number of distinct values and false positive probability.
     /// Will return an error if `fpp` is greater than or equal to 1.0 or less than 0.0.
-    pub(crate) fn new_with_ndv_fpp(ndv: u64, fpp: f64) -> Result<Self, ParquetError> {
+    pub fn new_with_ndv_fpp(ndv: u64, fpp: f64) -> Result<Self, ParquetError> {
         if !(0.0..1.0).contains(&fpp) {
             return Err(ParquetError::General(format!(
                 "False positive probability must be between 0.0 and 1.0, got {fpp}"
@@ -267,7 +269,7 @@ impl Sbbf {
 
     /// Create a new [Sbbf] with given number of bytes, the exact number of bytes will be adjusted
     /// to the next power of two bounded by [BITSET_MIN_LENGTH] and [BITSET_MAX_LENGTH].
-    pub(crate) fn new_with_num_of_bytes(num_bytes: usize) -> Self {
+    pub fn new_with_num_of_bytes(num_bytes: usize) -> Self {
         let num_bytes = optimal_num_of_bytes(num_bytes);
         assert_eq!(num_bytes % size_of::<Block>(), 0);
         let num_blocks = num_bytes / size_of::<Block>();
@@ -275,7 +277,8 @@ impl Sbbf {
         Self(bitset)
     }
 
-    pub(crate) fn new(bitset: &[u8]) -> Self {
+    /// Creates a new [Sbbf] from a raw byte slice.
+    pub fn new(bitset: &[u8]) -> Self {
         let data = bitset
             .chunks_exact(4 * 8)
             .map(|chunk| {
@@ -304,7 +307,7 @@ impl Sbbf {
 
     /// Write the bitset in serialized form to the writer.
     #[cfg(not(target_endian = "little"))]
-    fn write_bitset<W: Write>(&self, mut writer: W) -> Result<(), ParquetError> {
+    pub fn write_bitset<W: Write>(&self, mut writer: W) -> Result<(), ParquetError> {
         for block in &self.0 {
             writer
                 .write_all(block.to_le_bytes().as_slice())
@@ -317,7 +320,7 @@ impl Sbbf {
 
     /// Write the bitset in serialized form to the writer.
     #[cfg(target_endian = "little")]
-    fn write_bitset<W: Write>(&self, mut writer: W) -> Result<(), ParquetError> {
+    pub fn write_bitset<W: Write>(&self, mut writer: W) -> Result<(), ParquetError> {
         // Safety: Block is repr(transparent) and [u32; 8] can be reinterpreted as [u8; 32].
         let slice = unsafe {
             std::slice::from_raw_parts(
@@ -411,7 +414,7 @@ impl Sbbf {
     }
 
     /// Check if an [AsBytes] value is probably present or definitely absent in the filter
-    pub fn check<T: AsBytes>(&self, value: &T) -> bool {
+    pub fn check<T: AsBytes + ?Sized>(&self, value: &T) -> bool {
         self.check_hash(hash_as_bytes(value))
     }
 
diff --git a/parquet/src/column/chunker/cdc.rs b/parquet/src/column/chunker/cdc.rs
new file mode 100644
index 000000000000..750735730874
--- /dev/null
+++ b/parquet/src/column/chunker/cdc.rs
@@ -0,0 +1,2331 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use crate::errors::{ParquetError, Result};
+use crate::file::properties::CdcOptions;
+use crate::schema::types::ColumnDescriptor;
+
+use super::CdcChunk;
+use super::cdc_generated::{GEARHASH_TABLE, NUM_GEARHASH_TABLES};
+
+/// CDC (Content-Defined Chunking) divides data into variable-sized chunks based on
+/// content rather than fixed-size boundaries.
+///
+/// For example, given this sequence of values in a column:
+///
+/// ```text
+/// File1:    [1,2,3,   4,5,6,   7,8,9]
+///            chunk1   chunk2   chunk3
+/// ```
+///
+/// If a value is inserted between 3 and 4:
+///
+/// ```text
+/// File2:    [1,2,3,0,   4,5,6,   7,8,9]
+///            new-chunk  chunk2   chunk3
+/// ```
+///
+/// The chunking process adjusts to maintain stable boundaries across data modifications.
+/// Each chunk defines a new parquet data page which is contiguously written to the file.
+/// Since each page is compressed independently, the files' contents look like:
+///
+/// ```text
+/// File1:    [Page1][Page2][Page3]...
+/// File2:    [Page4][Page2][Page3]...
+/// ```
+///
+/// When uploaded to a content-addressable storage (CAS) system, the CAS splits the byte
+/// stream into content-defined blobs with unique identifiers. Identical blobs are stored
+/// only once, so Page2 and Page3 are deduplicated across File1 and File2.
+///
+/// ## Implementation
+///
+/// Only the parquet writer needs to be aware of content-defined chunking; the reader is
+/// unaffected. Each parquet column writer holds a `ContentDefinedChunker` instance
+/// depending on the writer's properties. The chunker's state is maintained across the
+/// entire column without being reset between pages and row groups.
+///
+/// This implements a [FastCDC]-inspired algorithm using gear hashing. The input data is
+/// fed byte-by-byte into a rolling hash; when the hash matches a predefined mask, a new
+/// chunk boundary candidate is recorded. To reduce the exponential variance of chunk
+/// sizes inherent in a single gear hash, the algorithm requires **8 consecutive mask
+/// matches** — each against a different pre-computed gear hash table — before committing
+/// to a boundary. This [central-limit-theorem normalization] makes the chunk size
+/// distribution approximately normal between `min_chunk_size` and `max_chunk_size`.
+///
+/// The chunker receives the record-shredded column data (def_levels, rep_levels, values)
+/// and iterates over the (def_level, rep_level, value) triplets while adjusting the
+/// column-global rolling hash. Whenever the rolling hash matches, the chunker creates a
+/// new chunk. For nested data (lists, maps, structs) chunk boundaries are restricted to
+/// top-level record boundaries (`rep_level == 0`) so that a nested row is never split
+/// across chunks.
+///
+/// Note that boundaries are deterministically calculated exclusively based on the data
+/// itself, so the same data always produces the same chunks given the same configuration.
+///
+/// Ported from the C++ implementation in apache/arrow#45360
+/// (`cpp/src/parquet/chunker_internal.cc`).
+///
+/// [FastCDC]: https://www.usenix.org/conference/atc16/technical-sessions/presentation/xia
+/// [central-limit-theorem normalization]: https://www.cidrdb.org/cidr2023/papers/p43-low.pdf
+#[derive(Debug)]
+pub(crate) struct ContentDefinedChunker {
+    /// Maximum definition level for this column.
+    max_def_level: i16,
+    /// Maximum repetition level for this column.
+    max_rep_level: i16,
+    /// Definition level at the nearest REPEATED ancestor.
+    repeated_ancestor_def_level: i16,
+
+    /// Minimum chunk size in bytes.
+    /// The rolling hash will not be updated until this size is reached for each chunk.
+    /// All data sent through the hash function counts towards the chunk size, including
+    /// definition and repetition levels if present.
+    min_chunk_size: i64,
+    /// Maximum chunk size in bytes.
+    /// A new chunk is created whenever the chunk size exceeds this value. The chunk size
+    /// distribution approximates a normal distribution between `min_chunk_size` and
+    /// `max_chunk_size`. Note that the parquet writer has a related `data_pagesize`
+    /// property that controls the maximum size of a parquet data page after encoding.
+    /// While setting `data_pagesize` smaller than `max_chunk_size` doesn't affect
+    /// chunking effectiveness, it results in more small parquet data pages.
+    max_chunk_size: i64,
+    /// Mask for matching against the rolling hash.
+    rolling_hash_mask: u64,
+
+    /// Rolling hash state, never reset — initialized once for the entire column.
+    rolling_hash: u64,
+    /// Whether the rolling hash has matched the mask since the last chunk boundary.
+    has_matched: bool,
+    /// Current run count for the central-limit-theorem normalization.
+    nth_run: usize,
+    /// Current chunk size in bytes.
+    chunk_size: i64,
+}
+
+impl ContentDefinedChunker {
+    pub fn new(desc: &ColumnDescriptor, options: &CdcOptions) -> Result<Self> {
+        let rolling_hash_mask = Self::calculate_mask(
+            options.min_chunk_size as i64,
+            options.max_chunk_size as i64,
+            options.norm_level,
+        )?;
+        Ok(Self {
+            max_def_level: desc.max_def_level(),
+            max_rep_level: desc.max_rep_level(),
+            repeated_ancestor_def_level: desc.repeated_ancestor_def_level(),
+            min_chunk_size: options.min_chunk_size as i64,
+            max_chunk_size: options.max_chunk_size as i64,
+            rolling_hash_mask,
+            rolling_hash: 0,
+            has_matched: false,
+            nth_run: 0,
+            chunk_size: 0,
+        })
+    }
+
+    /// Calculate the mask used to determine chunk boundaries from the rolling hash.
+    ///
+    /// The mask is calculated so that the expected chunk size distribution approximates
+    /// a normal distribution between min and max chunk sizes.
+    fn calculate_mask(min_chunk_size: i64, max_chunk_size: i64, norm_level: i32) -> Result<u64> {
+        if min_chunk_size < 0 {
+            return Err(ParquetError::General(
+                "min_chunk_size must be non-negative".to_string(),
+            ));
+        }
+        if max_chunk_size <= min_chunk_size {
+            return Err(ParquetError::General(
+                "max_chunk_size must be greater than min_chunk_size".to_string(),
+            ));
+        }
+
+        let avg_chunk_size = (min_chunk_size + max_chunk_size) / 2;
+        // Target size after subtracting the min-size skip window and dividing by the
+        // number of hash tables (for central-limit-theorem normalization).
+        let target_size = (avg_chunk_size - min_chunk_size) / NUM_GEARHASH_TABLES as i64;
+
+        // floor(log2(target_size)) — equivalent to C++ NumRequiredBits(target_size) - 1
+        let mask_bits = if target_size > 0 {
+            63 - target_size.leading_zeros() as i32
+        } else {
+            0
+        };
+
+        let effective_bits = mask_bits - norm_level;
+
+        if !(1..=63).contains(&effective_bits) {
+            return Err(ParquetError::General(format!(
+                "The number of bits in the CDC mask must be between 1 and 63, got {effective_bits}"
+            )));
+        }
+
+        // Create the mask by setting the top `effective_bits` bits.
+        Ok(u64::MAX << (64 - effective_bits))
+    }
+
+    /// Feed raw bytes into the rolling hash.
+    ///
+    /// The byte count always accumulates toward `chunk_size`, but the actual hash
+    /// update is skipped until `min_chunk_size` has been reached. This "skip window"
+    /// is the FastCDC optimization that prevents boundaries from appearing too early
+    /// in a chunk.
+    #[inline]
+    fn roll(&mut self, bytes: &[u8]) {
+        self.chunk_size += bytes.len() as i64;
+        if self.chunk_size < self.min_chunk_size {
+            return;
+        }
+        for &b in bytes {
+            self.rolling_hash = self
+                .rolling_hash
+                .wrapping_shl(1)
+                .wrapping_add(GEARHASH_TABLE[self.nth_run][b as usize]);
+            self.has_matched =
+                self.has_matched || ((self.rolling_hash & self.rolling_hash_mask) == 0);
+        }
+    }
+
+    /// Feed exactly `N` bytes into the rolling hash (compile-time width).
+    ///
+    /// Like [`roll`](Self::roll), but the byte count is known at compile time,
+    /// allowing the compiler to unroll the inner loop.
+    #[inline(always)]
+    fn roll_fixed<const N: usize>(&mut self, bytes: &[u8; N]) {
+        self.chunk_size += N as i64;
+        if self.chunk_size < self.min_chunk_size {
+            return;
+        }
+        for j in 0..N {
+            self.rolling_hash = self
+                .rolling_hash
+                .wrapping_shl(1)
+                .wrapping_add(GEARHASH_TABLE[self.nth_run][bytes[j] as usize]);
+            self.has_matched =
+                self.has_matched || ((self.rolling_hash & self.rolling_hash_mask) == 0);
+        }
+    }
+
+    /// Feed a definition or repetition level (i16) into the rolling hash.
+    #[inline]
+    fn roll_level(&mut self, level: i16) {
+        self.roll_fixed(&level.to_le_bytes());
+    }
+
+    /// Check whether a new chunk boundary should be created.
+    ///
+    /// A boundary is created when **either** of two conditions holds:
+    ///
+    /// 1. **CLT normalization**: The rolling hash has matched the mask (`has_matched`)
+    ///    *and* this is the 8th consecutive such match (`nth_run` reaches
+    ///    `NUM_GEARHASH_TABLES`). Each match advances to the next gear hash table, so
+    ///    8 independent matches are required. A single hash table would yield
+    ///    exponentially distributed chunk sizes; requiring 8 independent matches
+    ///    approximates a normal (Gaussian) distribution by the central limit theorem.
+    ///
+    /// 2. **Hard size limit**: `chunk_size` has reached `max_chunk_size`. This caps
+    ///    chunk size even if the CLT normalization sequence has not completed.
+    ///
+    /// Note: when `max_chunk_size` forces a boundary, `nth_run` is **not** reset, so
+    /// the CLT sequence continues from where it left off in the next chunk. This
+    /// matches the C++ behavior.
+    #[inline]
+    fn need_new_chunk(&mut self) -> bool {
+        if self.has_matched {
+            self.has_matched = false;
+            self.nth_run += 1;
+            if self.nth_run >= NUM_GEARHASH_TABLES {
+                self.nth_run = 0;
+                self.chunk_size = 0;
+                return true;
+            }
+        }
+        if self.chunk_size >= self.max_chunk_size {
+            self.chunk_size = 0;
+            return true;
+        }
+        false
+    }
+
+    /// Compute chunk boundaries for the given column data.
+    ///
+    /// The chunking state is maintained across the entire column without being
+    /// reset between pages and row groups. This enables the chunking process to
+    /// be continued between different write calls.
+    ///
+    /// We go over the (def_level, rep_level, value) triplets one by one while
+    /// adjusting the column-global rolling hash based on the triplet. Whenever
+    /// the rolling hash matches a predefined mask it sets `has_matched` to true.
+    ///
+    /// After each triplet [`need_new_chunk`](Self::need_new_chunk) is called to
+    /// evaluate if we need to create a new chunk.
+    fn calculate<F>(
+        &mut self,
+        def_levels: Option<&[i16]>,
+        rep_levels: Option<&[i16]>,
+        num_levels: usize,
+        mut roll_value: F,
+    ) -> Vec<CdcChunk>
+    where
+        F: FnMut(&mut Self, usize),
+    {
+        let has_def_levels = self.max_def_level > 0;
+        let has_rep_levels = self.max_rep_level > 0;
+
+        let mut chunks = Vec::new();
+        let mut prev_offset: usize = 0;
+        let mut prev_value_offset: usize = 0;
+        let mut value_offset: usize = 0;
+
+        if !has_rep_levels && !has_def_levels {
+            // Fastest path: non-nested, non-null data.
+            // Every level corresponds to exactly one non-null value, so
+            // value_offset == level_offset and num_values == num_levels.
+            //
+            // Example: required Int32, array = [10, 20, 30]
+            //   level:         0   1   2
+            //   value_offset:  0   1   2
+            for offset in 0..num_levels {
+                roll_value(self, offset);
+                if self.need_new_chunk() {
+                    chunks.push(CdcChunk {
+                        level_offset: prev_offset,
+                        num_levels: offset - prev_offset,
+                        value_offset: prev_offset,
+                        num_values: offset - prev_offset,
+                    });
+                    prev_offset = offset;
+                }
+            }
+            prev_value_offset = prev_offset;
+            value_offset = num_levels;
+        } else if !has_rep_levels {
+            // Non-nested data with nulls. value_offset only increments for
+            // non-null values (def == max_def), so it diverges from the
+            // level offset when nulls are present.
+            //
+            // Example: optional Int32, array = [1, null, 2, null, 3]
+            //   def_levels:    [1, 0, 1, 0, 1]
+            //   level:          0  1  2  3  4
+            //   value_offset:   0     1     2  (only increments on def==1)
+            let def_levels = def_levels.expect("def_levels required when max_def_level > 0");
+            #[allow(clippy::needless_range_loop)]
+            for offset in 0..num_levels {
+                let def_level = def_levels[offset];
+                self.roll_level(def_level);
+                if def_level == self.max_def_level {
+                    roll_value(self, offset);
+                }
+                // Check boundary before incrementing value_offset so that
+                // num_values reflects only entries in the completed chunk.
+                if self.need_new_chunk() {
+                    chunks.push(CdcChunk {
+                        level_offset: prev_offset,
+                        num_levels: offset - prev_offset,
+                        value_offset: prev_value_offset,
+                        num_values: value_offset - prev_value_offset,
+                    });
+                    prev_offset = offset;
+                    prev_value_offset = value_offset;
+                }
+                if def_level == self.max_def_level {
+                    value_offset += 1;
+                }
+            }
+        } else {
+            // Nested data with nulls. Two counters are needed:
+            //
+            //   leaf_offset: index into the leaf values array for hashing,
+            //     incremented for all leaf slots (def >= repeated_ancestor_def_level),
+            //     including null elements.
+            //
+            //   value_offset: index into non_null_indices for chunk boundaries,
+            //     incremented only for non-null leaf values (def == max_def_level).
+            //
+            // These diverge when nullable elements exist inside lists.
+            //
+            // Example: List<Int32?> with repeated_ancestor_def_level=2, max_def=3
+            //   row 0: [1, null, 2]   (3 leaf slots, 2 non-null)
+            //   row 1: [3]            (1 leaf slot, 1 non-null)
+            //
+            //   leaf array:    [1, null, 2, 3]
+            //   def_levels:    [3,  2,   3, 3]
+            //   rep_levels:    [0,  1,   1, 0]
+            //
+            //   level  def  leaf_offset  value_offset  action
+            //   ─────  ───  ───────────  ────────────  ──────────────────────────
+            //     0     3       0             0        roll_value(0), value++, leaf++
+            //     1     2       1             1        leaf++ only (null element)
+            //     2     3       2             1        roll_value(2), value++, leaf++
+            //     3     3       3             2        roll_value(3), value++, leaf++
+            //
+            // roll_value(2) correctly indexes leaf array position 2 (value "2").
+            // Using value_offset=1 would index position 1 (the null slot).
+            //
+            // Using value_offset for roll_value would hash the wrong array slot.
+            let def_levels = def_levels.expect("def_levels required for nested data");
+            let rep_levels = rep_levels.expect("rep_levels required for nested data");
+            let mut leaf_offset: usize = 0;
+
+            for offset in 0..num_levels {
+                let def_level = def_levels[offset];
+                let rep_level = rep_levels[offset];
+
+                self.roll_level(def_level);
+                self.roll_level(rep_level);
+                if def_level == self.max_def_level {
+                    roll_value(self, leaf_offset);
+                }
+
+                // Check boundary before incrementing value_offset so that
+                // num_values reflects only entries in the completed chunk.
+                if rep_level == 0 && self.need_new_chunk() {
+                    let levels_to_write = offset - prev_offset;
+                    if levels_to_write > 0 {
+                        chunks.push(CdcChunk {
+                            level_offset: prev_offset,
+                            num_levels: levels_to_write,
+                            value_offset: prev_value_offset,
+                            num_values: value_offset - prev_value_offset,
+                        });
+                        prev_offset = offset;
+                        prev_value_offset = value_offset;
+                    }
+                }
+                if def_level == self.max_def_level {
+                    value_offset += 1;
+                }
+                if def_level >= self.repeated_ancestor_def_level {
+                    leaf_offset += 1;
+                }
+            }
+        }
+
+        // Add the last chunk if we have any levels left.
+        if prev_offset < num_levels {
+            chunks.push(CdcChunk {
+                level_offset: prev_offset,
+                num_levels: num_levels - prev_offset,
+                value_offset: prev_value_offset,
+                num_values: value_offset - prev_value_offset,
+            });
+        }
+
+        #[cfg(debug_assertions)]
+        self.validate_chunks(&chunks, num_levels, value_offset);
+
+        chunks
+    }
+
+    /// Compute CDC chunk boundaries by dispatching on the Arrow array's data type
+    /// to feed value bytes into the rolling hash.
+    #[cfg(feature = "arrow")]
+    pub(crate) fn get_arrow_chunks(
+        &mut self,
+        def_levels: Option<&[i16]>,
+        rep_levels: Option<&[i16]>,
+        array: &dyn arrow_array::Array,
+    ) -> Result<Vec<CdcChunk>> {
+        use arrow_array::cast::AsArray;
+        use arrow_schema::DataType;
+
+        let num_levels = match def_levels {
+            Some(def_levels) => def_levels.len(),
+            None => array.len(),
+        };
+
+        macro_rules! fixed_width {
+            ($N:literal) => {{
+                let data = array.to_data();
+                let buffer = data.buffers()[0].as_slice();
+                let values = &buffer[data.offset() * $N..];
+                self.calculate(def_levels, rep_levels, num_levels, |c, i| {
+                    let offset = i * $N;
+                    let slice = &values[offset..offset + $N];
+                    c.roll_fixed::<$N>(slice.try_into().unwrap());
+                })
+            }};
+        }
+
+        macro_rules! binary_like {
+            ($a:expr) => {{
+                let a = $a;
+                self.calculate(def_levels, rep_levels, num_levels, |c, i| {
+                    c.roll(a.value(i).as_ref());
+                })
+            }};
+        }
+
+        let dtype = array.data_type();
+        let chunks = match dtype {
+            DataType::Null => self.calculate(def_levels, rep_levels, num_levels, |_, _| {}),
+            DataType::Boolean => {
+                let a = array.as_boolean();
+                self.calculate(def_levels, rep_levels, num_levels, |c, i| {
+                    c.roll_fixed(&[a.value(i) as u8]);
+                })
+            }
+            DataType::Int8 | DataType::UInt8 => fixed_width!(1),
+            DataType::Int16 | DataType::UInt16 | DataType::Float16 => fixed_width!(2),
+            DataType::Int32
+            | DataType::UInt32
+            | DataType::Float32
+            | DataType::Date32
+            | DataType::Time32(_)
+            | DataType::Interval(arrow_schema::IntervalUnit::YearMonth)
+            | DataType::Decimal32(_, _) => fixed_width!(4),
+            DataType::Int64
+            | DataType::UInt64
+            | DataType::Float64
+            | DataType::Date64
+            | DataType::Time64(_)
+            | DataType::Timestamp(_, _)
+            | DataType::Duration(_)
+            | DataType::Interval(arrow_schema::IntervalUnit::DayTime)
+            | DataType::Decimal64(_, _) => fixed_width!(8),
+            DataType::Interval(arrow_schema::IntervalUnit::MonthDayNano)
+            | DataType::Decimal128(_, _) => fixed_width!(16),
+            DataType::Decimal256(_, _) => fixed_width!(32),
+            DataType::FixedSizeBinary(_) => binary_like!(array.as_fixed_size_binary()),
+            DataType::Binary => binary_like!(array.as_binary::<i32>()),
+            DataType::LargeBinary => binary_like!(array.as_binary::<i64>()),
+            DataType::Utf8 => binary_like!(array.as_string::<i32>()),
+            DataType::LargeUtf8 => binary_like!(array.as_string::<i64>()),
+            DataType::BinaryView => binary_like!(array.as_binary_view()),
+            DataType::Utf8View => binary_like!(array.as_string_view()),
+            DataType::Dictionary(_, _) => {
+                let dict = array.as_any_dictionary();
+                self.get_arrow_chunks(def_levels, rep_levels, dict.keys())?
+            }
+            _ => {
+                return Err(ParquetError::General(format!(
+                    "content-defined chunking is not supported for data type {dtype:?}",
+                )));
+            }
+        };
+        Ok(chunks)
+    }
+
+    #[cfg(debug_assertions)]
+    fn validate_chunks(&self, chunks: &[CdcChunk], num_levels: usize, total_values: usize) {
+        assert!(!chunks.is_empty(), "chunks must be non-empty");
+
+        let first = &chunks[0];
+        assert_eq!(first.level_offset, 0, "first chunk must start at level 0");
+        assert_eq!(first.value_offset, 0, "first chunk must start at value 0");
+
+        let mut sum_levels = first.num_levels;
+        let mut sum_values = first.num_values;
+        for i in 1..chunks.len() {
+            let chunk = &chunks[i];
+            let prev = &chunks[i - 1];
+            assert!(chunk.num_levels > 0, "chunk must have levels");
+            assert_eq!(
+                chunk.level_offset,
+                prev.level_offset + prev.num_levels,
+                "level offsets must be contiguous"
+            );
+            assert_eq!(
+                chunk.value_offset,
+                prev.value_offset + prev.num_values,
+                "value offsets must be contiguous"
+            );
+            sum_levels += chunk.num_levels;
+            sum_values += chunk.num_values;
+        }
+        assert_eq!(sum_levels, num_levels, "chunks must cover all levels");
+        assert_eq!(sum_values, total_values, "chunks must cover all values");
+
+        let last = chunks.last().unwrap();
+        assert_eq!(
+            last.level_offset + last.num_levels,
+            num_levels,
+            "last chunk must end at num_levels"
+        );
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::basic::Type as PhysicalType;
+    use crate::schema::types::{ColumnPath, Type};
+    use std::sync::Arc;
+
+    fn make_desc(max_def_level: i16, max_rep_level: i16) -> ColumnDescriptor {
+        let tp = Type::primitive_type_builder("col", PhysicalType::INT32)
+            .build()
+            .unwrap();
+        ColumnDescriptor::new(
+            Arc::new(tp),
+            max_def_level,
+            max_rep_level,
+            ColumnPath::new(vec![]),
+        )
+    }
+
+    #[test]
+    fn test_calculate_mask_defaults() {
+        let mask = ContentDefinedChunker::calculate_mask(256 * 1024, 1024 * 1024, 0).unwrap();
+        // avg = 640 KiB, target = (640-256)*1024/8 = 49152, log2(49152) = 15
+        // mask = u64::MAX << (64 - 15) = top 15 bits set
+        let expected = u64::MAX << (64 - 15);
+        assert_eq!(mask, expected);
+    }
+
+    #[test]
+    fn test_calculate_mask_with_norm_level() {
+        let mask = ContentDefinedChunker::calculate_mask(256 * 1024, 1024 * 1024, 1).unwrap();
+        let expected = u64::MAX << (64 - 14);
+        assert_eq!(mask, expected);
+    }
+
+    #[test]
+    fn test_calculate_mask_invalid() {
+        assert!(ContentDefinedChunker::calculate_mask(-1, 100, 0).is_err());
+        assert!(ContentDefinedChunker::calculate_mask(100, 50, 0).is_err());
+        assert!(ContentDefinedChunker::calculate_mask(100, 100, 0).is_err());
+    }
+
+    #[test]
+    fn test_non_nested_non_null_single_chunk() {
+        let options = CdcOptions {
+            min_chunk_size: 8,
+            max_chunk_size: 1024,
+            norm_level: 0,
+        };
+        let mut chunker = ContentDefinedChunker::new(&make_desc(0, 0), &options).unwrap();
+
+        // Write a small amount of data — should produce exactly 1 chunk.
+        let num_values = 4;
+        let chunks = chunker.calculate(None, None, num_values, |c, i| {
+            c.roll_fixed::<4>(&(i as i32).to_le_bytes());
+        });
+        assert_eq!(chunks.len(), 1);
+        assert_eq!(chunks[0].level_offset, 0);
+        assert_eq!(chunks[0].value_offset, 0);
+        assert_eq!(chunks[0].num_levels, 4);
+    }
+
+    #[test]
+    fn test_max_chunk_size_forces_boundary() {
+        let options = CdcOptions {
+            min_chunk_size: 256,
+            max_chunk_size: 1024,
+            norm_level: 0,
+        };
+        let mut chunker = ContentDefinedChunker::new(&make_desc(0, 0), &options).unwrap();
+
+        // Write enough data to exceed max_chunk_size multiple times.
+        // Each i32 = 4 bytes, max_chunk_size=1024, so ~256 values per chunk max.
+        let num_values = 2000;
+        let chunks = chunker.calculate(None, None, num_values, |c, i| {
+            c.roll_fixed::<4>(&(i as i32).to_le_bytes());
+        });
+
+        // Should have multiple chunks
+        assert!(chunks.len() > 1);
+
+        // Verify contiguity
+        let mut total_levels = 0;
+        for (i, chunk) in chunks.iter().enumerate() {
+            assert_eq!(chunk.level_offset, total_levels);
+            if i < chunks.len() - 1 {
+                assert!(chunk.num_levels > 0);
+            }
+            total_levels += chunk.num_levels;
+        }
+        assert_eq!(total_levels, num_values);
+    }
+
+    #[test]
+    fn test_deterministic_chunks() {
+        let options = CdcOptions {
+            min_chunk_size: 4,
+            max_chunk_size: 64,
+            norm_level: 0,
+        };
+
+        let roll = |c: &mut ContentDefinedChunker, i: usize| {
+            c.roll_fixed::<8>(&(i as i64).to_le_bytes());
+        };
+
+        let mut chunker1 = ContentDefinedChunker::new(&make_desc(0, 0), &options).unwrap();
+        let chunks1 = chunker1.calculate(None, None, 200, roll);
+
+        let mut chunker2 = ContentDefinedChunker::new(&make_desc(0, 0), &options).unwrap();
+        let chunks2 = chunker2.calculate(None, None, 200, roll);
+
+        assert_eq!(chunks1.len(), chunks2.len());
+        for (a, b) in chunks1.iter().zip(chunks2.iter()) {
+            assert_eq!(a.level_offset, b.level_offset);
+            assert_eq!(a.num_levels, b.num_levels);
+            assert_eq!(a.value_offset, b.value_offset);
+            assert_eq!(a.num_values, b.num_values);
+        }
+    }
+
+    #[test]
+    fn test_nullable_non_nested() {
+        let options = CdcOptions {
+            min_chunk_size: 4,
+            max_chunk_size: 64,
+            norm_level: 0,
+        };
+        let mut chunker = ContentDefinedChunker::new(&make_desc(1, 0), &options).unwrap();
+
+        let num_levels = 20;
+        // def_level=1 means non-null, def_level=0 means null
+        let def_levels: Vec<i16> = (0..num_levels)
+            .map(|i| if i % 3 == 0 { 0 } else { 1 })
+            .collect();
+
+        let chunks = chunker.calculate(Some(&def_levels), None, num_levels, |c, i| {
+            c.roll_fixed::<4>(&(i as i32).to_le_bytes());
+        });
+
+        assert!(!chunks.is_empty());
+        let total: usize = chunks.iter().map(|c| c.num_levels).sum();
+        assert_eq!(total, num_levels);
+    }
+}
+
+/// Integration tests that exercise CDC through the Arrow writer/reader roundtrip.
+/// Ported from the C++ test suite in `chunker_internal_test.cc`.
+#[cfg(all(test, feature = "arrow"))]
+mod arrow_tests {
+    use std::borrow::Borrow;
+    use std::sync::Arc;
+
+    use arrow::util::data_gen::create_random_batch;
+    use arrow_array::cast::AsArray;
+    use arrow_array::{Array, ArrayRef, BooleanArray, Int32Array, RecordBatch};
+    use arrow_buffer::Buffer;
+    use arrow_data::ArrayData;
+    use arrow_schema::{DataType, Field, Fields, Schema};
+
+    use crate::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+    use crate::arrow::arrow_writer::ArrowWriter;
+    use crate::file::properties::{CdcOptions, WriterProperties};
+    use crate::file::reader::{FileReader, SerializedFileReader};
+
+    // --- Constants matching C++ TestCDCSingleRowGroup ---
+
+    const CDC_MIN_CHUNK_SIZE: usize = 4 * 1024;
+    const CDC_MAX_CHUNK_SIZE: usize = 16 * 1024;
+    const CDC_PART_SIZE: usize = 128 * 1024;
+    const CDC_EDIT_SIZE: usize = 128;
+    const CDC_ROW_GROUP_LENGTH: usize = 1024 * 1024;
+
+    // --- Helpers ---
+
+    /// Deterministic hash function matching the C++ test generator.
+    fn test_hash(seed: u64, index: u64) -> u64 {
+        let mut h = (index.wrapping_add(seed)).wrapping_mul(0xc4ceb9fe1a85ec53u64);
+        h ^= h >> 33;
+        h = h.wrapping_mul(0xff51afd7ed558ccdu64);
+        h ^= h >> 33;
+        h = h.wrapping_mul(0xc4ceb9fe1a85ec53u64);
+        h ^= h >> 33;
+        h
+    }
+
+    /// Generate a deterministic array for any supported data type, matching C++ `GenerateArray`.
+    fn generate_array(dtype: &DataType, nullable: bool, length: usize, seed: u64) -> ArrayRef {
+        macro_rules! gen_primitive {
+            ($array_type:ty, $cast:expr) => {{
+                if nullable {
+                    let arr: $array_type = (0..length)
+                        .map(|i| {
+                            let val = test_hash(seed, i as u64);
+                            if val % 10 == 0 {
+                                None
+                            } else {
+                                Some($cast(val))
+                            }
+                        })
+                        .collect();
+                    Arc::new(arr) as ArrayRef
+                } else {
+                    let arr: $array_type = (0..length)
+                        .map(|i| Some($cast(test_hash(seed, i as u64))))
+                        .collect();
+                    Arc::new(arr) as ArrayRef
+                }
+            }};
+        }
+
+        match dtype {
+            DataType::Boolean => {
+                if nullable {
+                    let arr: BooleanArray = (0..length)
+                        .map(|i| {
+                            let val = test_hash(seed, i as u64);
+                            if val % 10 == 0 {
+                                None
+                            } else {
+                                Some(val % 2 == 0)
+                            }
+                        })
+                        .collect();
+                    Arc::new(arr)
+                } else {
+                    let arr: BooleanArray = (0..length)
+                        .map(|i| Some(test_hash(seed, i as u64) % 2 == 0))
+                        .collect();
+                    Arc::new(arr)
+                }
+            }
+            DataType::Int32 => gen_primitive!(Int32Array, |v: u64| v as i32),
+            DataType::Int64 => {
+                gen_primitive!(arrow_array::Int64Array, |v: u64| v as i64)
+            }
+            DataType::Float64 => {
+                gen_primitive!(arrow_array::Float64Array, |v: u64| (v % 100000) as f64
+                    / 1000.0)
+            }
+            DataType::Utf8 => {
+                let arr: arrow_array::StringArray = if nullable {
+                    (0..length)
+                        .map(|i| {
+                            let val = test_hash(seed, i as u64);
+                            if val % 10 == 0 {
+                                None
+                            } else {
+                                Some(format!("str_{val}"))
+                            }
+                        })
+                        .collect()
+                } else {
+                    (0..length)
+                        .map(|i| Some(format!("str_{}", test_hash(seed, i as u64))))
+                        .collect()
+                };
+                Arc::new(arr)
+            }
+            DataType::Binary => {
+                let arr: arrow_array::BinaryArray = if nullable {
+                    (0..length)
+                        .map(|i| {
+                            let val = test_hash(seed, i as u64);
+                            if val % 10 == 0 {
+                                None
+                            } else {
+                                Some(format!("bin_{val}").into_bytes())
+                            }
+                        })
+                        .collect()
+                } else {
+                    (0..length)
+                        .map(|i| Some(format!("bin_{}", test_hash(seed, i as u64)).into_bytes()))
+                        .collect()
+                };
+                Arc::new(arr)
+            }
+            DataType::FixedSizeBinary(size) => {
+                let size = *size;
+                let mut builder = arrow_array::builder::FixedSizeBinaryBuilder::new(size);
+                for i in 0..length {
+                    let val = test_hash(seed, i as u64);
+                    if nullable && val % 10 == 0 {
+                        builder.append_null();
+                    } else {
+                        let s = format!("bin_{val}");
+                        let bytes = s.as_bytes();
+                        let mut buf = vec![0u8; size as usize];
+                        let copy_len = bytes.len().min(size as usize);
+                        buf[..copy_len].copy_from_slice(&bytes[..copy_len]);
+                        builder.append_value(&buf).unwrap();
+                    }
+                }
+                Arc::new(builder.finish())
+            }
+            DataType::Date32 => {
+                gen_primitive!(arrow_array::Date32Array, |v: u64| v as i32)
+            }
+            DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, _) => {
+                gen_primitive!(arrow_array::TimestampNanosecondArray, |v: u64| v as i64)
+            }
+            _ => panic!("Unsupported test data type: {dtype:?}"),
+        }
+    }
+
+    /// Generate a RecordBatch with the given schema, matching C++ `GenerateTable`.
+    fn generate_table(schema: &Arc<Schema>, length: usize, seed: u64) -> RecordBatch {
+        let arrays: Vec<ArrayRef> = schema
+            .fields()
+            .iter()
+            .enumerate()
+            .map(|(i, field)| {
+                generate_array(
+                    field.data_type(),
+                    field.is_nullable(),
+                    length,
+                    seed + i as u64 * 10,
+                )
+            })
+            .collect();
+        RecordBatch::try_new(schema.clone(), arrays).unwrap()
+    }
+
+    /// Compute the CDC byte width for a data type, matching C++ `bytes_per_record`.
+    /// Returns 0 for variable-length types.
+    fn cdc_byte_width(dtype: &DataType) -> usize {
+        match dtype {
+            DataType::Boolean => 1,
+            DataType::Int8 | DataType::UInt8 => 1,
+            DataType::Int16 | DataType::UInt16 | DataType::Float16 => 2,
+            DataType::Int32
+            | DataType::UInt32
+            | DataType::Float32
+            | DataType::Date32
+            | DataType::Time32(_) => 4,
+            DataType::Int64
+            | DataType::UInt64
+            | DataType::Float64
+            | DataType::Date64
+            | DataType::Time64(_)
+            | DataType::Timestamp(_, _)
+            | DataType::Duration(_) => 8,
+            DataType::Decimal128(_, _) => 16,
+            DataType::Decimal256(_, _) => 32,
+            DataType::FixedSizeBinary(n) => *n as usize,
+            _ => 0, // variable-length
+        }
+    }
+
+    /// Compute bytes_per_record for determining part/edit lengths, matching C++.
+    fn bytes_per_record(dtype: &DataType, nullable: bool) -> usize {
+        let bw = cdc_byte_width(dtype);
+        if bw > 0 {
+            if nullable { bw + 2 } else { bw }
+        } else {
+            16 // variable-length fallback, matching C++
+        }
+    }
+
+    /// Compute the CDC chunk size for an array slice, matching C++ `CalculateCdcSize`.
+    fn calculate_cdc_size(array: &dyn Array, nullable: bool) -> i64 {
+        let dtype = array.data_type();
+        let bw = cdc_byte_width(dtype);
+        let result = if bw > 0 {
+            // Fixed-width: count only non-null values
+            let valid_count = array.len() - array.null_count();
+            (valid_count * bw) as i64
+        } else {
+            // Variable-length: sum of actual byte lengths
+            match dtype {
+                DataType::Utf8 => {
+                    let a = array.as_string::<i32>();
+                    (0..a.len())
+                        .filter(|&i| a.is_valid(i))
+                        .map(|i| a.value(i).len() as i64)
+                        .sum()
+                }
+                DataType::Binary => {
+                    let a = array.as_binary::<i32>();
+                    (0..a.len())
+                        .filter(|&i| a.is_valid(i))
+                        .map(|i| a.value(i).len() as i64)
+                        .sum()
+                }
+                DataType::LargeBinary => {
+                    let a = array.as_binary::<i64>();
+                    (0..a.len())
+                        .filter(|&i| a.is_valid(i))
+                        .map(|i| a.value(i).len() as i64)
+                        .sum()
+                }
+                _ => panic!("CDC size calculation not implemented for {dtype:?}"),
+            }
+        };
+
+        if nullable {
+            // Add 2 bytes per element for definition levels
+            result + array.len() as i64 * 2
+        } else {
+            result
+        }
+    }
+
+    /// Page-level metadata for a single column within a row group.
+    struct ColumnInfo {
+        page_lengths: Vec<i64>,
+        has_dictionary_page: bool,
+    }
+
+    /// Extract per-row-group column info from Parquet data.
+    fn get_column_info(data: &[u8], column_index: usize) -> Vec<ColumnInfo> {
+        let reader = SerializedFileReader::new(bytes::Bytes::from(data.to_vec())).unwrap();
+        let metadata = reader.metadata();
+        let mut result = Vec::new();
+        for rg in 0..metadata.num_row_groups() {
+            let rg_reader = reader.get_row_group(rg).unwrap();
+            let col_reader = rg_reader.get_column_page_reader(column_index).unwrap();
+            let mut info = ColumnInfo {
+                page_lengths: Vec::new(),
+                has_dictionary_page: false,
+            };
+            for page in col_reader {
+                let page = page.unwrap();
+                match page.page_type() {
+                    crate::basic::PageType::DATA_PAGE | crate::basic::PageType::DATA_PAGE_V2 => {
+                        info.page_lengths.push(page.num_values() as i64);
+                    }
+                    crate::basic::PageType::DICTIONARY_PAGE => {
+                        info.has_dictionary_page = true;
+                    }
+                    _ => {}
+                }
+            }
+            result.push(info);
+        }
+        result
+    }
+
+    /// Assert that CDC chunk sizes are within the expected range.
+    /// Equivalent to C++ `AssertContentDefinedChunkSizes`.
+    fn assert_cdc_chunk_sizes(
+        array: &ArrayRef,
+        info: &ColumnInfo,
+        nullable: bool,
+        min_chunk_size: usize,
+        max_chunk_size: usize,
+        expect_dictionary_page: bool,
+    ) {
+        // Boolean and FixedSizeBinary never produce dictionary pages (matching C++)
+        let expect_dict = match array.data_type() {
+            DataType::Boolean | DataType::FixedSizeBinary(_) => false,
+            _ => expect_dictionary_page,
+        };
+        assert_eq!(
+            info.has_dictionary_page,
+            expect_dict,
+            "dictionary page mismatch for {:?}",
+            array.data_type()
+        );
+
+        let page_lengths = &info.page_lengths;
+        assert!(
+            page_lengths.len() > 1,
+            "CDC should produce multiple pages, got {page_lengths:?}"
+        );
+
+        let bw = cdc_byte_width(array.data_type());
+        // Only do exact CDC size validation for fixed-width and base binary-like types
+        if bw > 0
+            || matches!(
+                array.data_type(),
+                DataType::Utf8 | DataType::Binary | DataType::LargeBinary
+            )
+        {
+            let mut offset = 0i64;
+            for (i, &page_len) in page_lengths.iter().enumerate() {
+                let slice = array.slice(offset as usize, page_len as usize);
+                let cdc_size = calculate_cdc_size(slice.as_ref(), nullable);
+                if i < page_lengths.len() - 1 {
+                    assert!(
+                        cdc_size >= min_chunk_size as i64,
+                        "Page {i}: CDC size {cdc_size} < min {min_chunk_size}, pages={page_lengths:?}"
+                    );
+                }
+                assert!(
+                    cdc_size <= max_chunk_size as i64,
+                    "Page {i}: CDC size {cdc_size} > max {max_chunk_size}, pages={page_lengths:?}"
+                );
+                offset += page_len;
+            }
+            assert_eq!(
+                offset,
+                array.len() as i64,
+                "page lengths must sum to array length"
+            );
+        }
+    }
+
+    /// Write batches with CDC options and validate roundtrip.
+    /// Matches C++ `WriteTableToBuffer`.
+    fn write_with_cdc_options(
+        batches: &[&RecordBatch],
+        min_chunk_size: usize,
+        max_chunk_size: usize,
+        max_row_group_rows: Option<usize>,
+        enable_dictionary: bool,
+    ) -> Vec<u8> {
+        assert!(!batches.is_empty());
+        let schema = batches[0].schema();
+        let mut builder = WriterProperties::builder()
+            .set_dictionary_enabled(enable_dictionary)
+            .set_content_defined_chunking(Some(CdcOptions {
+                min_chunk_size,
+                max_chunk_size,
+                norm_level: 0,
+            }));
+        if let Some(max_rows) = max_row_group_rows {
+            builder = builder.set_max_row_group_row_count(Some(max_rows));
+        }
+        let props = builder.build();
+        let mut buf = Vec::new();
+        let mut writer = ArrowWriter::try_new(&mut buf, schema.clone(), Some(props)).unwrap();
+        for batch in batches {
+            writer.write(batch).unwrap();
+        }
+        writer.close().unwrap();
+
+        // Roundtrip validation (matching C++ WriteTableToBuffer)
+        let readback = read_batches(&buf);
+        let original_rows: usize = batches.iter().map(|b| b.num_rows()).sum();
+        let readback_rows: usize = readback.iter().map(|b| b.num_rows()).sum();
+        assert_eq!(original_rows, readback_rows, "Roundtrip row count mismatch");
+        if original_rows > 0 {
+            let original = concat_batches(batches.iter().copied());
+            let roundtrip = concat_batches(&readback);
+            assert_eq!(original, roundtrip, "Roundtrip validation failed");
+        }
+
+        buf
+    }
+
+    fn read_batches(data: &[u8]) -> Vec<RecordBatch> {
+        let reader = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::from(data.to_vec()))
+            .unwrap()
+            .build()
+            .unwrap();
+        reader.collect::<std::result::Result<Vec<_>, _>>().unwrap()
+    }
+
+    fn concat_batches(batches: impl IntoIterator<Item = impl Borrow<RecordBatch>>) -> RecordBatch {
+        let batches: Vec<_> = batches.into_iter().collect();
+        let schema = batches[0].borrow().schema();
+        let batches = batches.iter().map(|b| b.borrow());
+        arrow_select::concat::concat_batches(&schema, batches).unwrap()
+    }
+
+    /// LCS-based diff between two sequences of page lengths (ported from C++).
+    /// Includes the merge-adjacent-diffs post-processing from C++.
+    fn find_differences(first: &[i64], second: &[i64]) -> Vec<(Vec<i64>, Vec<i64>)> {
+        let n = first.len();
+        let m = second.len();
+        let mut dp = vec![vec![0usize; m + 1]; n + 1];
+        for i in 0..n {
+            for j in 0..m {
+                if first[i] == second[j] {
+                    dp[i + 1][j + 1] = dp[i][j] + 1;
+                } else {
+                    dp[i + 1][j + 1] = dp[i + 1][j].max(dp[i][j + 1]);
+                }
+            }
+        }
+        let mut common = Vec::new();
+        let (mut i, mut j) = (n, m);
+        while i > 0 && j > 0 {
+            if first[i - 1] == second[j - 1] {
+                common.push((i - 1, j - 1));
+                i -= 1;
+                j -= 1;
+            } else if dp[i - 1][j] >= dp[i][j - 1] {
+                i -= 1;
+            } else {
+                j -= 1;
+            }
+        }
+        common.reverse();
+
+        let mut result = Vec::new();
+        let (mut last_i, mut last_j) = (0usize, 0usize);
+        for (ci, cj) in &common {
+            if *ci > last_i || *cj > last_j {
+                result.push((first[last_i..*ci].to_vec(), second[last_j..*cj].to_vec()));
+            }
+            last_i = ci + 1;
+            last_j = cj + 1;
+        }
+        if last_i < n || last_j < m {
+            result.push((first[last_i..].to_vec(), second[last_j..].to_vec()));
+        }
+
+        // Merge adjacent diffs (matching C++ post-processing)
+        let mut merged: Vec<(Vec<i64>, Vec<i64>)> = Vec::new();
+        for diff in result {
+            if let Some(prev) = merged.last_mut() {
+                if prev.0.is_empty() && diff.1.is_empty() {
+                    prev.0 = diff.0;
+                    continue;
+                } else if prev.1.is_empty() && diff.0.is_empty() {
+                    prev.1 = diff.1;
+                    continue;
+                }
+            }
+            merged.push(diff);
+        }
+        merged
+    }
+
+    /// Assert exact page length differences between original and modified files.
+    /// Matches C++ `AssertPageLengthDifferences` (full version).
+    fn assert_page_length_differences(
+        original: &ColumnInfo,
+        modified: &ColumnInfo,
+        exact_equal_diffs: usize,
+        exact_larger_diffs: usize,
+        exact_smaller_diffs: usize,
+        edit_length: i64,
+    ) {
+        let diffs = find_differences(&original.page_lengths, &modified.page_lengths);
+        let expected = exact_equal_diffs + exact_larger_diffs + exact_smaller_diffs;
+
+        if diffs.len() != expected {
+            eprintln!("Original: {:?}", original.page_lengths);
+            eprintln!("Modified: {:?}", modified.page_lengths);
+            for d in &diffs {
+                eprintln!("  Diff: {:?} vs {:?}", d.0, d.1);
+            }
+        }
+        assert_eq!(
+            diffs.len(),
+            expected,
+            "Expected {expected} diffs, got {}",
+            diffs.len()
+        );
+
+        let (mut eq, mut larger, mut smaller) = (0usize, 0usize, 0usize);
+        for (left, right) in &diffs {
+            let left_sum: i64 = left.iter().sum();
+            let right_sum: i64 = right.iter().sum();
+            if left_sum == right_sum {
+                eq += 1;
+            } else if left_sum < right_sum {
+                larger += 1;
+                assert_eq!(
+                    left_sum + edit_length,
+                    right_sum,
+                    "Larger diff mismatch: {left_sum} + {edit_length} != {right_sum}"
+                );
+            } else {
+                smaller += 1;
+                assert_eq!(
+                    left_sum,
+                    right_sum + edit_length,
+                    "Smaller diff mismatch: {left_sum} != {right_sum} + {edit_length}"
+                );
+            }
+        }
+
+        assert_eq!(eq, exact_equal_diffs, "equal diffs count");
+        assert_eq!(larger, exact_larger_diffs, "larger diffs count");
+        assert_eq!(smaller, exact_smaller_diffs, "smaller diffs count");
+    }
+
+    /// Assert page length differences for update cases (simplified version).
+    /// Matches C++ `AssertPageLengthDifferences` (max_equal_diffs overload).
+    fn assert_page_length_differences_update(
+        original: &ColumnInfo,
+        modified: &ColumnInfo,
+        max_equal_diffs: usize,
+    ) {
+        let diffs = find_differences(&original.page_lengths, &modified.page_lengths);
+        assert!(
+            diffs.len() <= max_equal_diffs,
+            "Expected at most {max_equal_diffs} diffs, got {}",
+            diffs.len()
+        );
+        for (left, right) in &diffs {
+            let left_sum: i64 = left.iter().sum();
+            let right_sum: i64 = right.iter().sum();
+            assert_eq!(
+                left_sum, right_sum,
+                "Update diff should not change total row count"
+            );
+        }
+    }
+
+    // --- FindDifferences tests (ported from C++) ---
+
+    #[test]
+    fn test_find_differences_basic() {
+        let diffs = find_differences(&[1, 2, 3, 4, 5], &[1, 7, 8, 4, 5]);
+        assert_eq!(diffs.len(), 1);
+        assert_eq!(diffs[0].0, vec![2, 3]);
+        assert_eq!(diffs[0].1, vec![7, 8]);
+    }
+
+    #[test]
+    fn test_find_differences_multiple() {
+        let diffs = find_differences(&[1, 2, 3, 4, 5, 6, 7], &[1, 8, 9, 4, 10, 6, 11]);
+        assert_eq!(diffs.len(), 3);
+        assert_eq!(diffs[0].0, vec![2, 3]);
+        assert_eq!(diffs[0].1, vec![8, 9]);
+        assert_eq!(diffs[1].0, vec![5]);
+        assert_eq!(diffs[1].1, vec![10]);
+        assert_eq!(diffs[2].0, vec![7]);
+        assert_eq!(diffs[2].1, vec![11]);
+    }
+
+    #[test]
+    fn test_find_differences_different_lengths() {
+        let diffs = find_differences(&[1, 2, 3], &[1, 2, 3, 4, 5]);
+        assert_eq!(diffs.len(), 1);
+        assert!(diffs[0].0.is_empty());
+        assert_eq!(diffs[0].1, vec![4, 5]);
+    }
+
+    #[test]
+    fn test_find_differences_empty() {
+        let diffs = find_differences(&[], &[]);
+        assert!(diffs.is_empty());
+    }
+
+    #[test]
+    fn test_find_differences_changes_at_both_ends() {
+        let diffs = find_differences(&[1, 2, 3, 4, 5, 6, 7, 8, 9], &[0, 0, 2, 3, 4, 5, 7, 7, 8]);
+        assert_eq!(diffs.len(), 3);
+        assert_eq!(diffs[0].0, vec![1]);
+        assert_eq!(diffs[0].1, vec![0, 0]);
+        assert_eq!(diffs[1].0, vec![6]);
+        assert_eq!(diffs[1].1, vec![7]);
+        assert_eq!(diffs[2].0, vec![9]);
+        assert!(diffs[2].1.is_empty());
+    }
+
+    #[test]
+    fn test_find_differences_additional() {
+        let diffs = find_differences(
+            &[445, 312, 393, 401, 410, 138, 558, 457],
+            &[445, 312, 393, 393, 410, 138, 558, 457],
+        );
+        assert_eq!(diffs.len(), 1);
+        assert_eq!(diffs[0].0, vec![401]);
+        assert_eq!(diffs[0].1, vec![393]);
+    }
+
+    // --- Parameterized single-row-group tests via macro ---
+
+    macro_rules! cdc_single_rg_tests {
+        ($mod_name:ident, $dtype:expr, $nullable:expr) => {
+            mod $mod_name {
+                use super::*;
+
+                fn config() -> (DataType, bool, usize, usize) {
+                    let dtype: DataType = $dtype;
+                    let nullable: bool = $nullable;
+                    let bpr = bytes_per_record(&dtype, nullable);
+                    let part_length = CDC_PART_SIZE / bpr;
+                    let edit_length = CDC_EDIT_SIZE / bpr;
+                    (dtype, nullable, part_length, edit_length)
+                }
+
+                fn make_schema(dtype: &DataType, nullable: bool) -> Arc<Schema> {
+                    Arc::new(Schema::new(vec![Field::new("f0", dtype.clone(), nullable)]))
+                }
+
+                #[test]
+                fn delete_once() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let part1 = generate_table(&schema, part_length, 0);
+                    let part2 = generate_table(&schema, edit_length, 1);
+                    let part3 = generate_table(&schema, part_length, part_length as u64);
+
+                    let base = concat_batches([&part1, &part2, &part3]);
+                    let modified = concat_batches([&part1, &part3]);
+
+                    for enable_dictionary in [false, true] {
+                        let base_data = write_with_cdc_options(
+                            &[&base],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let mod_data = write_with_cdc_options(
+                            &[&modified],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+
+                        let base_info = get_column_info(&base_data, 0);
+                        let mod_info = get_column_info(&mod_data, 0);
+                        assert_eq!(base_info.len(), 1);
+                        assert_eq!(mod_info.len(), 1);
+
+                        assert_cdc_chunk_sizes(
+                            &base.column(0).clone(),
+                            &base_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+                        assert_cdc_chunk_sizes(
+                            &modified.column(0).clone(),
+                            &mod_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+
+                        assert_page_length_differences(
+                            &base_info[0],
+                            &mod_info[0],
+                            0,
+                            0,
+                            1,
+                            edit_length as i64,
+                        );
+                    }
+                }
+
+                #[test]
+                fn delete_twice() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let part1 = generate_table(&schema, part_length, 0);
+                    let part2 = generate_table(&schema, edit_length, 1);
+                    let part3 = generate_table(&schema, part_length, part_length as u64);
+                    let part4 = generate_table(&schema, edit_length, 2);
+                    let part5 = generate_table(&schema, part_length, 2 * part_length as u64);
+
+                    let base = concat_batches([&part1, &part2, &part3, &part4, &part5]);
+                    let modified = concat_batches([&part1, &part3, &part5]);
+
+                    for enable_dictionary in [false, true] {
+                        let base_data = write_with_cdc_options(
+                            &[&base],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let mod_data = write_with_cdc_options(
+                            &[&modified],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+
+                        let base_info = get_column_info(&base_data, 0);
+                        let mod_info = get_column_info(&mod_data, 0);
+                        assert_eq!(base_info.len(), 1);
+                        assert_eq!(mod_info.len(), 1);
+
+                        assert_cdc_chunk_sizes(
+                            &base.column(0).clone(),
+                            &base_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+                        assert_cdc_chunk_sizes(
+                            &modified.column(0).clone(),
+                            &mod_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+
+                        assert_page_length_differences(
+                            &base_info[0],
+                            &mod_info[0],
+                            0,
+                            0,
+                            2,
+                            edit_length as i64,
+                        );
+                    }
+                }
+
+                #[test]
+                fn insert_once() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let part1 = generate_table(&schema, part_length, 0);
+                    let part2 = generate_table(&schema, edit_length, 1);
+                    let part3 = generate_table(&schema, part_length, part_length as u64);
+
+                    let base = concat_batches([&part1, &part3]);
+                    let modified = concat_batches([&part1, &part2, &part3]);
+
+                    for enable_dictionary in [false, true] {
+                        let base_data = write_with_cdc_options(
+                            &[&base],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let mod_data = write_with_cdc_options(
+                            &[&modified],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+
+                        let base_info = get_column_info(&base_data, 0);
+                        let mod_info = get_column_info(&mod_data, 0);
+                        assert_eq!(base_info.len(), 1);
+                        assert_eq!(mod_info.len(), 1);
+
+                        assert_cdc_chunk_sizes(
+                            &base.column(0).clone(),
+                            &base_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+                        assert_cdc_chunk_sizes(
+                            &modified.column(0).clone(),
+                            &mod_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+
+                        assert_page_length_differences(
+                            &base_info[0],
+                            &mod_info[0],
+                            0,
+                            1,
+                            0,
+                            edit_length as i64,
+                        );
+                    }
+                }
+
+                #[test]
+                fn insert_twice() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let part1 = generate_table(&schema, part_length, 0);
+                    let part2 = generate_table(&schema, edit_length, 1);
+                    let part3 = generate_table(&schema, part_length, part_length as u64);
+                    let part4 = generate_table(&schema, edit_length, 2);
+                    let part5 = generate_table(&schema, part_length, 2 * part_length as u64);
+
+                    let base = concat_batches([&part1, &part3, &part5]);
+                    let modified = concat_batches([&part1, &part2, &part3, &part4, &part5]);
+
+                    for enable_dictionary in [false, true] {
+                        let base_data = write_with_cdc_options(
+                            &[&base],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let mod_data = write_with_cdc_options(
+                            &[&modified],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+
+                        let base_info = get_column_info(&base_data, 0);
+                        let mod_info = get_column_info(&mod_data, 0);
+                        assert_eq!(base_info.len(), 1);
+                        assert_eq!(mod_info.len(), 1);
+
+                        assert_cdc_chunk_sizes(
+                            &base.column(0).clone(),
+                            &base_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+                        assert_cdc_chunk_sizes(
+                            &modified.column(0).clone(),
+                            &mod_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+
+                        assert_page_length_differences(
+                            &base_info[0],
+                            &mod_info[0],
+                            0,
+                            2,
+                            0,
+                            edit_length as i64,
+                        );
+                    }
+                }
+
+                #[test]
+                fn update_once() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let part1 = generate_table(&schema, part_length, 0);
+                    let part2 = generate_table(&schema, edit_length, 1);
+                    let part3 = generate_table(&schema, part_length, part_length as u64);
+                    let part4 = generate_table(&schema, edit_length, 2);
+
+                    let base = concat_batches([&part1, &part2, &part3]);
+                    let modified = concat_batches([&part1, &part4, &part3]);
+
+                    for enable_dictionary in [false, true] {
+                        let base_data = write_with_cdc_options(
+                            &[&base],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let mod_data = write_with_cdc_options(
+                            &[&modified],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+
+                        let base_info = get_column_info(&base_data, 0);
+                        let mod_info = get_column_info(&mod_data, 0);
+                        assert_eq!(base_info.len(), 1);
+                        assert_eq!(mod_info.len(), 1);
+
+                        assert_cdc_chunk_sizes(
+                            &base.column(0).clone(),
+                            &base_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+                        assert_cdc_chunk_sizes(
+                            &modified.column(0).clone(),
+                            &mod_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+
+                        assert_page_length_differences_update(&base_info[0], &mod_info[0], 1);
+                    }
+                }
+
+                #[test]
+                fn update_twice() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let part1 = generate_table(&schema, part_length, 0);
+                    let part2 = generate_table(&schema, edit_length, 1);
+                    let part3 = generate_table(&schema, part_length, part_length as u64);
+                    let part4 = generate_table(&schema, edit_length, 2);
+                    let part5 = generate_table(&schema, part_length, 2 * part_length as u64);
+                    let part6 = generate_table(&schema, edit_length, 3);
+                    let part7 = generate_table(&schema, edit_length, 4);
+
+                    let base = concat_batches([&part1, &part2, &part3, &part4, &part5]);
+                    let modified = concat_batches([&part1, &part6, &part3, &part7, &part5]);
+
+                    for enable_dictionary in [false, true] {
+                        let base_data = write_with_cdc_options(
+                            &[&base],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let mod_data = write_with_cdc_options(
+                            &[&modified],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+
+                        let base_info = get_column_info(&base_data, 0);
+                        let mod_info = get_column_info(&mod_data, 0);
+                        assert_eq!(base_info.len(), 1);
+                        assert_eq!(mod_info.len(), 1);
+
+                        assert_cdc_chunk_sizes(
+                            &base.column(0).clone(),
+                            &base_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+                        assert_cdc_chunk_sizes(
+                            &modified.column(0).clone(),
+                            &mod_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+
+                        assert_page_length_differences_update(&base_info[0], &mod_info[0], 2);
+                    }
+                }
+
+                #[test]
+                fn prepend() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let part1 = generate_table(&schema, part_length, 0);
+                    let part2 = generate_table(&schema, edit_length, 1);
+                    let part3 = generate_table(&schema, part_length, part_length as u64);
+                    let part4 = generate_table(&schema, edit_length, 2);
+
+                    let base = concat_batches([&part1, &part2, &part3]);
+                    let modified = concat_batches([&part4, &part1, &part2, &part3]);
+
+                    for enable_dictionary in [false, true] {
+                        let base_data = write_with_cdc_options(
+                            &[&base],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let mod_data = write_with_cdc_options(
+                            &[&modified],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+
+                        let base_info = get_column_info(&base_data, 0);
+                        let mod_info = get_column_info(&mod_data, 0);
+                        assert_eq!(base_info.len(), 1);
+                        assert_eq!(mod_info.len(), 1);
+
+                        assert_cdc_chunk_sizes(
+                            &base.column(0).clone(),
+                            &base_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+                        assert_cdc_chunk_sizes(
+                            &modified.column(0).clone(),
+                            &mod_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+
+                        assert!(
+                            mod_info[0].page_lengths.len() >= base_info[0].page_lengths.len(),
+                            "Modified should have same or more pages"
+                        );
+
+                        assert_page_length_differences(
+                            &base_info[0],
+                            &mod_info[0],
+                            0,
+                            1,
+                            0,
+                            edit_length as i64,
+                        );
+                    }
+                }
+
+                #[test]
+                fn append() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let part1 = generate_table(&schema, part_length, 0);
+                    let part2 = generate_table(&schema, edit_length, 1);
+                    let part3 = generate_table(&schema, part_length, part_length as u64);
+                    let part4 = generate_table(&schema, edit_length, 2);
+
+                    let base = concat_batches([&part1, &part2, &part3]);
+                    let modified = concat_batches([&part1, &part2, &part3, &part4]);
+
+                    for enable_dictionary in [false, true] {
+                        let base_data = write_with_cdc_options(
+                            &[&base],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let mod_data = write_with_cdc_options(
+                            &[&modified],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+
+                        let base_info = get_column_info(&base_data, 0);
+                        let mod_info = get_column_info(&mod_data, 0);
+                        assert_eq!(base_info.len(), 1);
+                        assert_eq!(mod_info.len(), 1);
+
+                        assert_cdc_chunk_sizes(
+                            &base.column(0).clone(),
+                            &base_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+                        assert_cdc_chunk_sizes(
+                            &modified.column(0).clone(),
+                            &mod_info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            enable_dictionary,
+                        );
+
+                        let bp = &base_info[0].page_lengths;
+                        let mp = &mod_info[0].page_lengths;
+                        assert!(mp.len() >= bp.len());
+                        for i in 0..bp.len() - 1 {
+                            assert_eq!(bp[i], mp[i], "Page {i} should be identical");
+                        }
+                        assert!(mp[bp.len() - 1] >= bp[bp.len() - 1]);
+                    }
+                }
+
+                #[test]
+                fn empty_table() {
+                    let (dtype, nullable, _, _) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let empty = RecordBatch::new_empty(schema);
+                    for enable_dictionary in [false, true] {
+                        let data = write_with_cdc_options(
+                            &[&empty],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            enable_dictionary,
+                        );
+                        let info = get_column_info(&data, 0);
+                        // Empty table: either no row groups or one with no data pages
+                        if !info.is_empty() {
+                            assert!(info[0].page_lengths.is_empty());
+                        }
+                    }
+                }
+
+                #[test]
+                fn array_offsets() {
+                    let (dtype, nullable, part_length, edit_length) = config();
+                    let schema = make_schema(&dtype, nullable);
+
+                    let table = concat_batches([
+                        &generate_table(&schema, part_length, 0),
+                        &generate_table(&schema, edit_length, 1),
+                        &generate_table(&schema, part_length, part_length as u64),
+                    ]);
+
+                    for offset in [0usize, 512, 1024] {
+                        if offset >= table.num_rows() {
+                            continue;
+                        }
+                        let sliced = table.slice(offset, table.num_rows() - offset);
+                        let data = write_with_cdc_options(
+                            &[&sliced],
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            Some(CDC_ROW_GROUP_LENGTH),
+                            true,
+                        );
+                        let info = get_column_info(&data, 0);
+                        assert_eq!(info.len(), 1);
+
+                        // Verify CDC actually produced content-defined chunks
+                        assert_cdc_chunk_sizes(
+                            &sliced.column(0).clone(),
+                            &info[0],
+                            nullable,
+                            CDC_MIN_CHUNK_SIZE,
+                            CDC_MAX_CHUNK_SIZE,
+                            true,
+                        );
+                    }
+                }
+            }
+        };
+    }
+
+    // Instantiate for representative types matching C++ categories
+    cdc_single_rg_tests!(cdc_bool_non_null, DataType::Boolean, false);
+    cdc_single_rg_tests!(cdc_i32_non_null, DataType::Int32, false);
+    cdc_single_rg_tests!(cdc_i64_nullable, DataType::Int64, true);
+    cdc_single_rg_tests!(cdc_f64_nullable, DataType::Float64, true);
+    cdc_single_rg_tests!(cdc_utf8_non_null, DataType::Utf8, false);
+    cdc_single_rg_tests!(cdc_binary_nullable, DataType::Binary, true);
+    cdc_single_rg_tests!(cdc_fsb16_nullable, DataType::FixedSizeBinary(16), true);
+    cdc_single_rg_tests!(cdc_date32_non_null, DataType::Date32, false);
+    cdc_single_rg_tests!(
+        cdc_timestamp_nullable,
+        DataType::Timestamp(arrow_schema::TimeUnit::Nanosecond, None),
+        true
+    );
+
+    // --- Multiple row group tests matching C++ TestCDCMultipleRowGroups ---
+
+    mod cdc_multiple_row_groups {
+        use super::*;
+
+        const PART_LENGTH: usize = 128 * 1024;
+        const EDIT_LENGTH: usize = 128;
+        const ROW_GROUP_LENGTH: usize = 64 * 1024;
+
+        fn schema() -> Arc<Schema> {
+            Arc::new(Schema::new(vec![
+                Field::new("int32", DataType::Int32, true),
+                Field::new("float64", DataType::Float64, true),
+                Field::new("bool", DataType::Boolean, false),
+            ]))
+        }
+
+        #[test]
+        fn insert_once() {
+            let s = schema();
+            let part1 = generate_table(&s, PART_LENGTH, 0);
+            let part2 = generate_table(&s, PART_LENGTH, 2);
+            let part3 = generate_table(&s, PART_LENGTH, 4);
+            let edit1 = generate_table(&s, EDIT_LENGTH, 1);
+            let edit2 = generate_table(&s, EDIT_LENGTH, 3);
+
+            let base = concat_batches([&part1, &edit1, &part2, &part3]);
+            let modified = concat_batches([&part1, &edit1, &edit2, &part2, &part3]);
+            assert_eq!(modified.num_rows(), base.num_rows() + EDIT_LENGTH);
+
+            let base_data = write_with_cdc_options(
+                &[&base],
+                CDC_MIN_CHUNK_SIZE,
+                CDC_MAX_CHUNK_SIZE,
+                Some(ROW_GROUP_LENGTH),
+                false,
+            );
+            let mod_data = write_with_cdc_options(
+                &[&modified],
+                CDC_MIN_CHUNK_SIZE,
+                CDC_MAX_CHUNK_SIZE,
+                Some(ROW_GROUP_LENGTH),
+                false,
+            );
+
+            for col in 0..s.fields().len() {
+                let base_info = get_column_info(&base_data, col);
+                let mod_info = get_column_info(&mod_data, col);
+
+                assert_eq!(base_info.len(), 7, "expected 7 row groups for col {col}");
+                assert_eq!(mod_info.len(), 7);
+
+                // First two row groups should be identical
+                assert_eq!(base_info[0].page_lengths, mod_info[0].page_lengths);
+                assert_eq!(base_info[1].page_lengths, mod_info[1].page_lengths);
+
+                // Middle row groups: 1 larger + 1 smaller diff
+                for i in 2..mod_info.len() - 1 {
+                    assert_page_length_differences(
+                        &base_info[i],
+                        &mod_info[i],
+                        0,
+                        1,
+                        1,
+                        EDIT_LENGTH as i64,
+                    );
+                }
+                // Last row group: just larger
+                assert_page_length_differences(
+                    base_info.last().unwrap(),
+                    mod_info.last().unwrap(),
+                    0,
+                    1,
+                    0,
+                    EDIT_LENGTH as i64,
+                );
+            }
+        }
+
+        #[test]
+        fn delete_once() {
+            let s = schema();
+            let part1 = generate_table(&s, PART_LENGTH, 0);
+            let part2 = generate_table(&s, PART_LENGTH, 2);
+            let part3 = generate_table(&s, PART_LENGTH, 4);
+            let edit1 = generate_table(&s, EDIT_LENGTH, 1);
+            let edit2 = generate_table(&s, EDIT_LENGTH, 3);
+
+            let base = concat_batches([&part1, &edit1, &part2, &part3, &edit2]);
+            let modified = concat_batches([&part1, &part2, &part3, &edit2]);
+
+            let base_data = write_with_cdc_options(
+                &[&base],
+                CDC_MIN_CHUNK_SIZE,
+                CDC_MAX_CHUNK_SIZE,
+                Some(ROW_GROUP_LENGTH),
+                false,
+            );
+            let mod_data = write_with_cdc_options(
+                &[&modified],
+                CDC_MIN_CHUNK_SIZE,
+                CDC_MAX_CHUNK_SIZE,
+                Some(ROW_GROUP_LENGTH),
+                false,
+            );
+
+            for col in 0..s.fields().len() {
+                let base_info = get_column_info(&base_data, col);
+                let mod_info = get_column_info(&mod_data, col);
+
+                assert_eq!(base_info.len(), 7);
+                assert_eq!(mod_info.len(), 7);
+
+                assert_eq!(base_info[0].page_lengths, mod_info[0].page_lengths);
+                assert_eq!(base_info[1].page_lengths, mod_info[1].page_lengths);
+
+                for i in 2..mod_info.len() - 1 {
+                    assert_page_length_differences(
+                        &base_info[i],
+                        &mod_info[i],
+                        0,
+                        1,
+                        1,
+                        EDIT_LENGTH as i64,
+                    );
+                }
+                assert_page_length_differences(
+                    base_info.last().unwrap(),
+                    mod_info.last().unwrap(),
+                    0,
+                    0,
+                    1,
+                    EDIT_LENGTH as i64,
+                );
+            }
+        }
+
+        #[test]
+        fn update_once() {
+            let s = schema();
+            let part1 = generate_table(&s, PART_LENGTH, 0);
+            let part2 = generate_table(&s, PART_LENGTH, 2);
+            let part3 = generate_table(&s, PART_LENGTH, 4);
+            let edit1 = generate_table(&s, EDIT_LENGTH, 1);
+            let edit2 = generate_table(&s, EDIT_LENGTH, 3);
+            let edit3 = generate_table(&s, EDIT_LENGTH, 5);
+
+            let base = concat_batches([&part1, &edit1, &part2, &part3, &edit2]);
+            let modified = concat_batches([&part1, &edit3, &part2, &part3, &edit2]);
+
+            let base_data = write_with_cdc_options(
+                &[&base],
+                CDC_MIN_CHUNK_SIZE,
+                CDC_MAX_CHUNK_SIZE,
+                Some(ROW_GROUP_LENGTH),
+                false,
+            );
+            let mod_data = write_with_cdc_options(
+                &[&modified],
+                CDC_MIN_CHUNK_SIZE,
+                CDC_MAX_CHUNK_SIZE,
+                Some(ROW_GROUP_LENGTH),
+                false,
+            );
+
+            for col in 0..s.fields().len() {
+                let nullable = s.field(col).is_nullable();
+                let base_info = get_column_info(&base_data, col);
+                let mod_info = get_column_info(&mod_data, col);
+
+                assert_eq!(base_info.len(), 7);
+                assert_eq!(mod_info.len(), 7);
+
+                // Validate CDC chunk sizes on at least the first row group
+                assert_cdc_chunk_sizes(
+                    &base.column(col).slice(0, ROW_GROUP_LENGTH),
+                    &base_info[0],
+                    nullable,
+                    CDC_MIN_CHUNK_SIZE,
+                    CDC_MAX_CHUNK_SIZE,
+                    false,
+                );
+
+                assert_eq!(base_info[0].page_lengths, mod_info[0].page_lengths);
+                assert_eq!(base_info[1].page_lengths, mod_info[1].page_lengths);
+
+                // Row group containing the edit
+                assert_page_length_differences_update(&base_info[2], &mod_info[2], 1);
+
+                // Remaining row groups should be identical
+                for i in 3..mod_info.len() {
+                    assert_eq!(base_info[i].page_lengths, mod_info[i].page_lengths);
+                }
+            }
+        }
+
+        #[test]
+        fn append() {
+            let s = schema();
+            let part1 = generate_table(&s, PART_LENGTH, 0);
+            let part2 = generate_table(&s, PART_LENGTH, 2);
+            let part3 = generate_table(&s, PART_LENGTH, 4);
+            let edit1 = generate_table(&s, EDIT_LENGTH, 1);
+            let edit2 = generate_table(&s, EDIT_LENGTH, 3);
+
+            let base = concat_batches([&part1, &edit1, &part2, &part3]);
+            let modified = concat_batches([&part1, &edit1, &part2, &part3, &edit2]);
+
+            let base_data = write_with_cdc_options(
+                &[&base],
+                CDC_MIN_CHUNK_SIZE,
+                CDC_MAX_CHUNK_SIZE,
+                Some(ROW_GROUP_LENGTH),
+                false,
+            );
+            let mod_data = write_with_cdc_options(
+                &[&modified],
+                CDC_MIN_CHUNK_SIZE,
+                CDC_MAX_CHUNK_SIZE,
+                Some(ROW_GROUP_LENGTH),
+                false,
+            );
+
+            for col in 0..s.fields().len() {
+                let nullable = s.field(col).is_nullable();
+                let base_info = get_column_info(&base_data, col);
+                let mod_info = get_column_info(&mod_data, col);
+
+                assert_eq!(base_info.len(), 7);
+                assert_eq!(mod_info.len(), 7);
+
+                // Validate CDC chunk sizes on the first row group
+                assert_cdc_chunk_sizes(
+                    &base.column(col).slice(0, ROW_GROUP_LENGTH),
+                    &base_info[0],
+                    nullable,
+                    CDC_MIN_CHUNK_SIZE,
+                    CDC_MAX_CHUNK_SIZE,
+                    false,
+                );
+
+                // All row groups except last should be identical
+                for i in 0..base_info.len() - 1 {
+                    assert_eq!(base_info[i].page_lengths, mod_info[i].page_lengths);
+                }
+
+                // Last row group: pages should be identical except last
+                let bp = &base_info.last().unwrap().page_lengths;
+                let mp = &mod_info.last().unwrap().page_lengths;
+                assert!(mp.len() >= bp.len());
+                for i in 0..bp.len() - 1 {
+                    assert_eq!(bp[i], mp[i]);
+                }
+            }
+        }
+    }
+
+    // --- Direct chunker test (kept from original) ---
+
+    #[test]
+    fn test_cdc_array_offsets_direct() {
+        use crate::basic::Type as PhysicalType;
+        use crate::schema::types::{ColumnDescriptor, ColumnPath, Type};
+
+        let options = CdcOptions {
+            min_chunk_size: CDC_MIN_CHUNK_SIZE,
+            max_chunk_size: CDC_MAX_CHUNK_SIZE,
+            norm_level: 0,
+        };
+        let desc = {
+            let tp = Type::primitive_type_builder("col", PhysicalType::INT32)
+                .build()
+                .unwrap();
+            ColumnDescriptor::new(Arc::new(tp), 0, 0, ColumnPath::new(vec![]))
+        };
+
+        let bpr = bytes_per_record(&DataType::Int32, false);
+        let n = CDC_PART_SIZE / bpr;
+        let offset = 10usize;
+
+        let array: Int32Array = (0..n).map(|i| test_hash(0, i as u64) as i32).collect();
+        let mut chunker = super::ContentDefinedChunker::new(&desc, &options).unwrap();
+        let chunks = chunker.get_arrow_chunks(None, None, &array).unwrap();
+
+        let sliced = array.slice(offset, n - offset);
+        let mut chunker2 = super::ContentDefinedChunker::new(&desc, &options).unwrap();
+        let chunks2 = chunker2.get_arrow_chunks(None, None, &sliced).unwrap();
+
+        let values: Vec<usize> = chunks.iter().map(|c| c.num_values).collect();
+        let values2: Vec<usize> = chunks2.iter().map(|c| c.num_values).collect();
+
+        assert!(values.len() > 1, "expected multiple chunks, got {values:?}");
+        assert_eq!(values.len(), values2.len(), "chunk count must match");
+
+        assert_eq!(
+            values[0] - values2[0],
+            offset,
+            "offsetted first chunk should be {offset} values shorter"
+        );
+        assert_eq!(
+            &values[1..],
+            &values2[1..],
+            "all chunks after the first must be identical"
+        );
+    }
+
+    /// Helper to write a batch with CDC and read it back.
+    fn cdc_roundtrip(batch: &RecordBatch) -> RecordBatch {
+        let props = WriterProperties::builder()
+            .set_content_defined_chunking(Some(CdcOptions::default()))
+            .build();
+        let mut buffer = Vec::new();
+        let mut writer = ArrowWriter::try_new(&mut buffer, batch.schema(), Some(props)).unwrap();
+        writer.write(batch).unwrap();
+        writer.close().unwrap();
+
+        let reader = ParquetRecordBatchReaderBuilder::try_new(bytes::Bytes::from(buffer))
+            .unwrap()
+            .build()
+            .unwrap();
+        reader.into_iter().next().unwrap().unwrap()
+    }
+
+    /// Regression test for <https://github.com/apache/arrow-rs/issues/9637>
+    ///
+    /// Writing nested list data with CDC enabled panicked with an out-of-bounds
+    /// slice access when null list entries had non-zero child ranges.
+    #[test]
+    fn test_cdc_list_roundtrip() {
+        let schema = Arc::new(Schema::new(vec![
+            Field::new(
+                "_1",
+                DataType::List(Arc::new(Field::new_list_field(DataType::Int32, true))),
+                true,
+            ),
+            Field::new(
+                "_2",
+                DataType::List(Arc::new(Field::new_list_field(DataType::Boolean, true))),
+                true,
+            ),
+            Field::new(
+                "_3",
+                DataType::LargeList(Arc::new(Field::new_list_field(DataType::Utf8, true))),
+                true,
+            ),
+        ]));
+        let batch = create_random_batch(schema, 2, 0.25, 0.75).unwrap();
+        assert_eq!(cdc_roundtrip(&batch), batch);
+    }
+
+    /// Test CDC with deeply nested types: List<List<Int32>>, List<Struct<List<Int32>>>
+    #[test]
+    fn test_cdc_deeply_nested_roundtrip() {
+        let inner_field = Field::new_list_field(DataType::Int32, true);
+        let inner_type = DataType::List(Arc::new(inner_field));
+        let outer_field = Field::new_list_field(inner_type.clone(), true);
+        let list_list_type = DataType::List(Arc::new(outer_field));
+
+        let struct_inner_field = Field::new_list_field(DataType::Int32, true);
+        let struct_inner_type = DataType::List(Arc::new(struct_inner_field));
+        let struct_fields = Fields::from(vec![Field::new("a", struct_inner_type, true)]);
+        let struct_type = DataType::Struct(struct_fields);
+        let struct_list_field = Field::new_list_field(struct_type, true);
+        let list_struct_type = DataType::List(Arc::new(struct_list_field));
+
+        let schema = Arc::new(Schema::new(vec![
+            Field::new("list_list", list_list_type, true),
+            Field::new("list_struct_list", list_struct_type, true),
+        ]));
+        let batch = create_random_batch(schema, 200, 0.25, 0.75).unwrap();
+        assert_eq!(cdc_roundtrip(&batch), batch);
+    }
+
+    /// Test CDC with list arrays that have non-empty null segments.
+    ///
+    /// Per the Arrow columnar format spec: "a null value may correspond to a
+    /// non-empty segment in the child array". This test constructs such arrays
+    /// manually and verifies the CDC writer handles them correctly.
+    #[test]
+    fn test_cdc_list_non_empty_null_segments() {
+        // Build List<Int32> where null entries own non-zero child ranges:
+        //   row 0: [1, 2]     offsets[0..2]  valid
+        //   row 1: null        offsets[2..5]  null, but owns 3 child values
+        //   row 2: [6, 7]     offsets[5..7]  valid
+        //   row 3: null        offsets[7..9]  null, but owns 2 child values
+        //   row 4: [10]        offsets[9..10] valid
+        let values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
+        let offsets = Buffer::from_iter([0_i32, 2, 5, 7, 9, 10]);
+        let null_bitmap = Buffer::from([0b00010101]); // rows 0, 2, 4 valid
+
+        let list_type = DataType::List(Arc::new(Field::new_list_field(DataType::Int32, false)));
+        let list_data = unsafe {
+            ArrayData::new_unchecked(
+                list_type.clone(),
+                5,
+                None,
+                Some(null_bitmap),
+                0,
+                vec![offsets],
+                vec![values.to_data()],
+            )
+        };
+        let list_array = arrow_array::make_array(list_data);
+
+        let schema = Arc::new(Schema::new(vec![Field::new("col", list_type, true)]));
+        let batch = RecordBatch::try_new(schema, vec![list_array]).unwrap();
+
+        let read = cdc_roundtrip(&batch);
+        let read_list = read.column(0).as_list::<i32>();
+        assert_eq!(read_list.len(), 5);
+        assert!(read_list.is_valid(0));
+        assert!(read_list.is_null(1));
+        assert!(read_list.is_valid(2));
+        assert!(read_list.is_null(3));
+        assert!(read_list.is_valid(4));
+
+        let get_vals = |i: usize| -> Vec<i32> {
+            read_list
+                .value(i)
+                .as_primitive::<arrow_array::types::Int32Type>()
+                .values()
+                .iter()
+                .copied()
+                .collect()
+        };
+        assert_eq!(get_vals(0), vec![1, 2]);
+        assert_eq!(get_vals(2), vec![6, 7]);
+        assert_eq!(get_vals(4), vec![10]);
+    }
+}
diff --git a/parquet/src/column/chunker/cdc_codegen.py b/parquet/src/column/chunker/cdc_codegen.py
new file mode 100644
index 000000000000..3675c92d0281
--- /dev/null
+++ b/parquet/src/column/chunker/cdc_codegen.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Produce the given number gearhash tables for rolling hash calculations.
+
+Each table consists of 256 64-bit integer values and by default 8 tables are
+produced. The tables are written to a Rust source file.
+
+The generated numbers are deterministic "random" numbers created by MD5 hashing
+a fixed seed and the table index. This ensures that the tables are the same
+across different runs and platforms. The function of generating the numbers is
+less important as long as they have sufficiently uniform distribution.
+
+Reference implementations:
+- https://github.com/Borelset/destor/blob/master/src/chunking/fascdc_chunking.c
+- https://github.com/nlfiedler/fastcdc-rs/blob/master/examples/table64.rs
+
+Usage:
+    python cdc_codegen.py [ntables]
+
+    ntables: Number of gearhash tables to generate (default 8).
+
+    The generated source file is written to ./cdc_generated.rs
+"""
+
+import hashlib
+import pathlib
+import sys
+from io import StringIO
+
+
+template = """\
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// This table should be identical with
+// https://github.com/apache/arrow/blob/main/cpp/src/parquet/chunker_internal_generated.h
+// Ensure that both tables remain in sync after any changes.
+
+#[rustfmt::skip]
+pub(crate) const NUM_GEARHASH_TABLES: usize = {ntables};
+
+#[rustfmt::skip]
+pub(crate) const GEARHASH_TABLE: [[u64; 256]; NUM_GEARHASH_TABLES] = [
+{content}];
+"""
+
+
+def generate_hash(n: int, seed: int):
+    """Produce predictable hash values for a given seed and n using MD5.
+
+    The value can be arbitrary as long as it is deterministic and has a uniform
+    distribution. The MD5 hash is used to produce a 16 character hexadecimal
+    string which is then converted to a 64-bit integer.
+    """
+    value = bytes([seed] * 64 + [n] * 64)
+    hasher = hashlib.md5(value)
+    return hasher.hexdigest()[:16]
+
+
+def generate_hashtable(seed: int, length=256):
+    """Generate and render a single gearhash table."""
+    table = [generate_hash(n, seed=seed) for n in range(length)]
+
+    out = StringIO()
+    out.write(f"    // seed = {seed}\n")
+    out.write("    [\n")
+    for i in range(0, length, 4):
+        values = [f"0x{value}" for value in table[i : i + 4]]
+        out.write(f"        {', '.join(values)},\n")
+    out.write("    ]")
+
+    return out.getvalue()
+
+
+def generate_source(ntables=8, relative_path="cdc_generated.rs"):
+    """Generate a Rust source file with multiple gearhash tables."""
+    path = pathlib.Path(__file__).parent / relative_path
+    tables = [generate_hashtable(seed) for seed in range(ntables)]
+    content = ",\n".join(tables)
+    text = template.format(ntables=ntables, content=content)
+    path.write_text(text)
+
+
+if __name__ == "__main__":
+    ntables = int(sys.argv[1]) if len(sys.argv) > 1 else 8
+    generate_source(ntables)
diff --git a/parquet/src/column/chunker/cdc_generated.rs b/parquet/src/column/chunker/cdc_generated.rs
new file mode 100644
index 000000000000..4222e3669245
--- /dev/null
+++ b/parquet/src/column/chunker/cdc_generated.rs
@@ -0,0 +1,558 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#[rustfmt::skip]
+pub(crate) const NUM_GEARHASH_TABLES: usize = 8;
+
+#[rustfmt::skip]
+pub(crate) const GEARHASH_TABLE: [[u64; 256]; NUM_GEARHASH_TABLES] = [
+    // seed = 0
+    [
+        0xf09f35a563783945, 0x0dcc5b3bc5ae410a, 0x63f1ea8d22554270, 0xfbe5ee7bd05a7b61,
+        0x3f692ed5e9934aba, 0xaab3755952250eb8, 0xdefb168dc2888fa5, 0x501b36f7c77a7d47,
+        0xd2fff45d1989642d, 0x80217c1c600e30a6, 0xb9469ee2e43df7ac, 0x3654b76a61999706,
+        0x6ea73dfe5de0c6b6, 0xdfd662e1937a589d, 0x0dbe0cc74b188a68, 0xde45f4e6d73ffc6f,
+        0xcdf7a7759e70d87e, 0x5d6a951b8d38c310, 0xdc9423c3813fcf2c, 0x25dc2976e167ffce,
+        0xc2555baa1d031c84, 0x115bc3f2230a3ab6, 0xd4b10260f350bede, 0xdfd3501ab447d723,
+        0x022e79217edaf167, 0x1635e2255c5a7526, 0xa0a750350cc77102, 0xc027133e05d39f56,
+        0xd949459779cf0387, 0xb92f1464f5c688c2, 0xd9ac5f3e8b42f2f3, 0xdf02bb6f5ecaac21,
+        0x8156f988fac7bfa4, 0xe4580f97bede2ec8, 0x44fe7d17a76fca32, 0x885f59bd54c2014c,
+        0x435e63ec655ffae9, 0x5ebc51930967b1f1, 0x5428c2084ac29e47, 0x9465938fec30e36b,
+        0xc7cb3de4977772cd, 0x15692d7c201e8c3a, 0x505ee65cdc4b17f4, 0x7d9839a0a7aead6b,
+        0xeef5f5b6a0105291, 0x76c2fb232ce7f5bf, 0x5c13893c1c3ff3a9, 0x65b6b547d4442f98,
+        0xb8ad7487c8c96fce, 0x906bcf51c99974f8, 0x2f56e48bb943a48c, 0xbc9ab109f82d3a44,
+        0xcd5160cdc8c7e735, 0xbe9acb9df3427732, 0x386b91d477d7fade, 0x36be463621dd5af2,
+        0xcbe6a2faffd627a8, 0x9c8fd528463a2f5a, 0xb9b88c6bb802b184, 0xb414b4e665c597c7,
+        0xbedb142568209556, 0x5360d81c25429dce, 0x63a69a960a952f37, 0xc900d63899e1b503,
+        0x1abc63a8b37c7728, 0xa8b3a8b6409080eb, 0x495e391f662959f6, 0xdf1e136f3e12229b,
+        0x33d5fc526b0dd38d, 0x321221ae2abfac63, 0x7fde18351fda7395, 0xed79fe5c3a6aa4c3,
+        0x2dd6965a4867d8d4, 0x54813ca20fe8799b, 0x5d59ea6456465c39, 0x0de0c294d1936b81,
+        0x4aaf0755002c588c, 0x3530a1857ad04c6d, 0xb8a64f4ce184442b, 0xe0def10bceedfa17,
+        0x46e38d0a443757ec, 0x9795a1c645ee16d7, 0x7e531def245eac8a, 0x683b25c43a0716cf,
+        0x884583d372da219d, 0x5b06b62c910416e5, 0x54b6902fbebd3dbe, 0x931198d40a761a75,
+        0xead7d8e830013590, 0x80b4d5dc99bfaced, 0xf98272c8108a1ad2, 0x1adce054289a0ec6,
+        0x7d53a1143c56b465, 0x497fbe4f00c92b52, 0x525e4cc2e81ebd69, 0xc94478e0d5508ff6,
+        0xb8a5da83c196d07c, 0x7667a921b65b0603, 0xf236fabbdefe6cd1, 0x53da978d19a92b98,
+        0xc604f6e97087124d, 0x2cbd27221924b094, 0x65cd1102c985b1d2, 0x08c0755dc1a97eb4,
+        0x5e0419e921c0fef1, 0x282d2c1196f84a29, 0xe21117fcfc5793f7, 0xcf4e985dc38e6c2e,
+        0xd521f4f264d55616, 0xde69b04c485f2a10, 0x59410e245305178a, 0xceab1d477c943601,
+        0xa9805732d71ee5e9, 0x054cd443896974f6, 0xf2b517717a423a3e, 0x09517937fa9fac95,
+        0x4938233e9ca871e3, 0x9132cbaf56f83ec0, 0x4703421ed1dd027d, 0xfd9933f4e6f1ec4e,
+        0xf237c7fded2274a8, 0xdf4616efe68cd7b4, 0x5e46de0f39f0a380, 0x3d41e0c6d8e095b0,
+        0xc5272f8a5bb2df09, 0x68aa78e8301fb964, 0xbf5b5b52c8e32ae0, 0xbf28ed3df74bdcf7,
+        0xd6198f64c833815a, 0x8cd99d2974267544, 0xd90560ea4465ff2c, 0x571d65ad7ad59261,
+        0x309453518baa367a, 0xa60538377bc79fb2, 0xace515da1ab4183c, 0xf56d3c8d891d1c5b,
+        0x5b0d8370b59def49, 0x775866ce7c83c762, 0x3d76085695c8e18a, 0xba064d1a9af1b114,
+        0xc84ef7cd7b98b521, 0x90b9231681c2bc37, 0x37e2b13e6f585b6b, 0x1d0a34e55e0f369f,
+        0x86bb8019cf41447c, 0x4b95c6ef55b3f71f, 0x3b6ed1660732b310, 0x617eee603d137f21,
+        0xf4f6278b464f3bbc, 0xdfb763b720da205a, 0x353478899b871cb7, 0xe45fbbff574cc41e,
+        0x1a94b60847907d72, 0xb10eef051eff67a5, 0xf0e012ec6a284d40, 0xcc1cd1a11b926d7c,
+        0xcf9d9c5453e19cad, 0x270febcc0fc0e86b, 0xd6567568778b781e, 0x7323b98965eeb46b,
+        0xccecd374567086ff, 0xef7b44bfc497a704, 0xebc479c051a9f0a5, 0xc9b7410e3e00a235,
+        0x1d084f7ecdf83dab, 0xc8a9a97e33ba8ba3, 0x8c75318f5b2350d6, 0xaa3cd5d0c684bdda,
+        0xa81125fe0901bedf, 0xf7bcd76020edfc93, 0x834ee4c12e75874f, 0xb2bb8a7beb44fa14,
+        0x32cd26f50a4f4e4d, 0x0fc5817ca55d959a, 0xd6e4ae2e3ae10718, 0x074abdcceb8d6e38,
+        0xc0cc5f4f9b3a9c43, 0x1115d364363595b2, 0x69861db2eb19f2e8, 0x59b8d804cf92bc67,
+        0x9bac9785e5e4b863, 0x7fa0e17a41869561, 0x10d3c9633f0c709c, 0x534a03deee6bc44a,
+        0x73b1f7201257f581, 0x46fd6a11e2e0706b, 0x494abb554946e67a, 0xb5d6da317864dc8e,
+        0x402ded9238f39687, 0xd8fa37d2cbd6d290, 0xcc818293fcb06791, 0x6482ab344806cd4d,
+        0x0956e6ee9d8eb60b, 0x01fee622d8465ac8, 0xae7ece370cbd9c35, 0x7ff09e937a177279,
+        0xa2c29ee7a33ca5f1, 0x990e8dbee083923b, 0x4a819b72f610863a, 0xddecfad79d3f08be,
+        0x627372480fac20a7, 0x802154d6eca2db4c, 0x8fcf02e42f805e55, 0x040a911ff8cea977,
+        0xbb544485bc64d0d4, 0xaddde1aeb406d0fb, 0xf6b35fae23dce66f, 0xc07a9fb3645d2f9b,
+        0xccd113907e9c0fed, 0xd17af369984fd213, 0x9223823c59a083e7, 0xe19d475606b81013,
+        0xe181ac116a90e57a, 0x71f7b6258c6def4c, 0x2246f34b45964f7c, 0xd74aedaea2d31751,
+        0xb1add86e5dd305d1, 0xeb9ba881f16d6471, 0xef7600e036f5c6ff, 0x1d50bc9735b8fb85,
+        0xe63942bd1f3e2969, 0x9241ba9f8b3f4e72, 0xee8bb2bca07d35b6, 0x55cd55dab522654e,
+        0x94d0cfa7c1a6845d, 0x02f9845d559884c3, 0x8ce70ea21063b560, 0xd70998028ef08b74,
+        0xdfdb5bbee310876b, 0x4e21b2e348256d16, 0xde007a981c13debc, 0xe51950cbbddabfdd,
+        0xd223301dbe9957c1, 0x084b8634cc2cce4b, 0x90e551378aa9d70c, 0x833b533ac633e448,
+        0x7891e232882da57f, 0xa1bf26f0163ce2b3, 0xf33a0171eb9c68d5, 0x2e7de18ca69b3fa2,
+        0x666fd6f175619199, 0x1239d37edb5feb9f, 0xfa9fc9382e61ff5c, 0x3ca4ad427e3c126f,
+        0x37c6dd4c2c31ae6e, 0x1f1bacb619d427b2, 0x7dd09f5d10759afe, 0xc8d941432327d733,
+        0x2b389ba25e1d43a7, 0xa4e3030c3740ff21, 0xcc56dae13fd37463, 0x2481457c175b560f,
+        0x9deb35bde77c5c41, 0x847aa6ea5549a0c3, 0xcde01bb48b6e7f02, 0x15a28844e64cb211,
+    ],
+    // seed = 1
+    [
+        0xecfcba92fe5691a3, 0x71377799fea34699, 0xb284c9096fa614e5, 0x54534170f40de6c8,
+        0xbbd804d45884fba3, 0x44929a896388c8a1, 0x79b712508e0fa3b1, 0xeb53ab280af31054,
+        0x351ea23a6319da7a, 0x2fbe55d9819d85a2, 0x34f4b6568dcd28b1, 0x8c94ea5e5d82967a,
+        0x09068d333a46d3c5, 0x762ad4f64cb73381, 0xd5c6db5ef0e22640, 0x36d8ab5a36175680,
+        0xd41fe333cdc3525a, 0xa1f51dbdf20ce781, 0x1410a95e786c8be6, 0x96b7499a670c2b41,
+        0x3912e1037835d893, 0x272c5bd83e1e9115, 0x2ea7f91cad82a0d6, 0xcd10e85662ce9931,
+        0xedad49be8d5e8b74, 0x7ccd8fe0f37d12bc, 0xfac0482005eed593, 0x4513991681f6c8b0,
+        0x2804d612eb0ad37d, 0x7cca9e8412b81d34, 0x85ffd6707192b7b8, 0xea0560aeea954411,
+        0x0122d28226102bba, 0xf51c47cdbd22fdd1, 0x3707d851183ff17c, 0xaef5a1465f3e902d,
+        0xbcb38c2d8736a04f, 0x4025317e864bef15, 0x8d3f66d86e1ea58f, 0xc16759a3d97ed79a,
+        0x1c62abdc0659f2f5, 0x23b3eb4e699bd28f, 0x5083c4fceed3ccaf, 0xa65bf34562cc989c,
+        0xaa5865932fd79064, 0xf24d08d268c24593, 0x7fbd00a215196999, 0x7812cd366d752964,
+        0x62e8dcb27ef3d945, 0xf08b7984e1b946dc, 0x547d23ad9a5c1dcf, 0x496b1fb249b27fb7,
+        0xcd692e1db5f3b3ba, 0x41931e39f1e1bc61, 0x286c6a7d7edae82b, 0x17ef6638b6c4ca6e,
+        0x609beb5a2576a934, 0xcc5e16fe4a69b83c, 0xbbd14d08b078fc24, 0x2a617680f481cb94,
+        0x81dbbd5f86e6d039, 0xeb8205e1fc8ecc3c, 0xe5e3bb576faa8042, 0x5d6f1eb9d9df01b5,
+        0x9a47b8739c10fb44, 0x398a7caad7ea7696, 0x9c0fc1d7c46adde6, 0x67cd6de0a51978a6,
+        0x68ccc4b77a21cca4, 0x1e067066b82f415c, 0xf7ddade6535e1819, 0xf2185c884291751b,
+        0xc322b7381fcbe34f, 0x242f593e88290b9b, 0x8e11ccc0ea5e84a3, 0x40e3a2e3346db8a2,
+        0xf18bfc3ad2931a2c, 0x2468397394b00144, 0xeae199cce14e6817, 0x05b462686c75a1ae,
+        0xda096cb859c51673, 0xd87aeb967a906bef, 0xaabc74493cb02fe6, 0x74d48fc2e7da143e,
+        0x6ec1c8fed3f2c1fd, 0xe01e0704b463f18e, 0xc3d88a4d3a8056e4, 0xd01ae0ffab6c8f3f,
+        0x881ba052620ae7c7, 0xcea033aef0a823a5, 0x8d2cad91d83df1e3, 0x18746d205e66dbe9,
+        0x3061f8e58d046650, 0xd819c59f0ce2cf8b, 0x144e89e93635e870, 0x3415e88279b21651,
+        0xd6f7ab944b86c3fa, 0x45f1dd15d0f67bdc, 0xbf0d97c7f4fa24f4, 0x34a7de520a57fcd2,
+        0x4ba86fda03e9e2bc, 0xa7995265a025b552, 0x698f6819d5f51cf7, 0xd07dbe9d8a156981,
+        0x2683945373857fc1, 0x116f8a84f96167de, 0x8bc832bd85595ebf, 0xb206519d74fdfafa,
+        0xde9519b2e9b5cc5f, 0x16fdd6f2da1d8163, 0x7ba32bd48ef56f11, 0x6f4e4d7ee8b29717,
+        0xd31576dde7468aad, 0x023bb08848676045, 0xf6dcc083178160b7, 0x42035f426250e683,
+        0x343732993cfed89f, 0x0640a870a22d3d58, 0x65cff80b53b4ae6a, 0x27996fa17ab05215,
+        0xfd5db01401b21a04, 0x894508784bc1673c, 0x5bfcf43a2380e27d, 0x4cd6dcc2715583b7,
+        0xa43b3763e7d4c902, 0x6da83e12ef0c1257, 0xfe80a602b0335aff, 0x293a7d8f4ff344de,
+        0xb4ae7c2b8956bf5a, 0x6b45432d38254b4d, 0xd086acbdf15d9455, 0xa4d19e43f41ea87b,
+        0xf01f13ba4bb87fbf, 0xca582cf301a299ff, 0x0ddad3d45298fa7d, 0x0646a130459c3999,
+        0xc08e3af3747e2cee, 0xfc7db8aa9ed67295, 0x783b329e7bd79d5f, 0x732dbc607957af7b,
+        0x8e446ac19fb26555, 0xff1dfa4d61dc89a5, 0xb6fbc46bd8d011d8, 0x185147ec5779f0d7,
+        0x6eb2cf6149a5380f, 0xb0e773df803a1eae, 0xc07706c5519bfce5, 0xc35abcf54fa95f14,
+        0x40a01d99a38608ea, 0x776dcd6f603c277f, 0x6ae12389b1d6d0bb, 0x8bd981448df92bb9,
+        0x426a6a7ca21a2c16, 0x87efd5b71c1bad26, 0x71fb7fc4cd41de48, 0xdd9033c45619d463,
+        0x40eaab322654cef7, 0xe077fffed6f3e3a2, 0x375a4dbef9384447, 0x2066b009d2c4a100,
+        0xeca4a5794a068447, 0x2128f64bddf341a1, 0x738b4bb1be90bd61, 0x433772cf3813d52e,
+        0x9540c88add8e4474, 0x0b6d5decd21d3519, 0x654ead966745642d, 0xe1bfb03c3b4bdb4c,
+        0x0b977a9937515b1f, 0x0a4587509ef63870, 0xe89f0de1d9cfd44a, 0x23a91390272e7f68,
+        0xd92defbc9096b8d8, 0x004db87174612539, 0xc88ecaabdd1a71f1, 0x050de38393073346,
+        0x8af1426d7964e038, 0xf352c4fef8ad5c87, 0x6f26bc7408e26548, 0x0d41543fd9bf3084,
+        0xfc4e07553a840fc6, 0x5ef117de86a555a9, 0x1f11c42dffb5ae1b, 0x4147648f07490fa5,
+        0x09b35fd7671b21aa, 0x1453b14f7ccca481, 0x944f6fcce4c9b2ba, 0x5b08dd2e3583dc06,
+        0xe0220df78dc9c22d, 0x1c200b9506cbf666, 0x8a0b7465eadb523b, 0xfbcb43a91a1e2d80,
+        0xe697f44be3c36a58, 0x2f8a8e48fb7e350d, 0x7baba71b8920d55f, 0x10edc0216105bc96,
+        0x52db07c79d7a7a63, 0x1916e8cef9452ac3, 0x5cbbbf21f867b6cc, 0xadd583365a690a4b,
+        0x4e4ca2c8bffc2fdb, 0xf5fe3416d2eebcfe, 0x839af8b85e452476, 0x8496c0c54ad44e16,
+        0x6c46f1ecad4482bf, 0xb794cad76ae18715, 0x67b762eec7c62985, 0x52dc9e68df5b3a53,
+        0x0cc7e444b422a5f9, 0xadbfe90841c112b0, 0xfe37b136f0ca5c34, 0xcfe9e47948a8d73e,
+        0xee90572b86a30d91, 0x549e72d8262830aa, 0x3361564b469f32c6, 0x1e6eba9e0d2648e2,
+        0x5f8e2b2ac5fcb4eb, 0xe4224fa5f71f7cc6, 0x7357a9230c76757b, 0xcad70f74aaf6b702,
+        0xeef28ced23894cc2, 0x753fdd3352aefd68, 0x1fed6ba90bbeb9d2, 0x05316f4ab4034b4b,
+        0x3396df022b9f63d6, 0x82d7125a7cfd0935, 0x3519a71caf1f87f0, 0xd1dfb7a5cc3974be,
+        0xbfae40ecbdbbcc2a, 0x152c11778e08dd54, 0x4a96566a6c848554, 0x3a84d621c340cdd7,
+        0xfd47aa1887e2fb03, 0xa63cae94b2f1d099, 0xed61783f3e5b75e0, 0xefd44864106019be,
+        0x145ff78b80b081aa, 0x34670e5fcea9230e, 0x876ef976328db371, 0x4221f3a5269942a6,
+        0x95315cbd85c648f4, 0x3ca344dc7c3b1600, 0x38421ea39ff28780, 0x31dbeee967c0435c,
+        0x27437c3e268402e7, 0xdd0cf8343312a654, 0x965ab9dad1d8aa29, 0xf871706dd3e23509,
+        0xce23d06c7a25e699, 0x1b37d59382b27589, 0x3407f004723d6324, 0x56efb69cdb5deaa1,
+        0xf46cdd2b9fd604e0, 0xcad3ca79fdac69bd, 0x7252802a574e63cb, 0xc281fb8acc6ec1d3,
+    ],
+    // seed = 2
+    [
+        0xdd16cb672ba6979c, 0x3954eaa9ec41ae41, 0x52cb802771d2966d, 0xf57ed8eb0d0294f2,
+        0x768be23c71da2219, 0x6131e22d95a84ad3, 0xd849e4e49bb15842, 0x18e8e5c4978cf00d,
+        0x3af5e5867ce1f9bd, 0x06c75a9fffe83d63, 0xe8de75a00b58a065, 0x0a773251bc0d755a,
+        0x629dc21e54548329, 0x2a168f5e5a883e70, 0x33547375f0996c86, 0xdfcb4c7680451322,
+        0x55c1ecaaaa57e397, 0x4546c346c24f5a31, 0x6f8f0401dfabc86c, 0x7760d2d36ee340b4,
+        0xf6448e48bdeb229d, 0xba70e1633b4dba65, 0x069cda561e273054, 0xa010b6a84aebf340,
+        0x5c23b8229eee34b6, 0xea63c926d90153af, 0x7d7de27b3e43ec1b, 0xea119541eddc3491,
+        0xf1259daeddfc724c, 0x2873ca9a67730647, 0xa1e7710dade32607, 0x758de030b61d43fd,
+        0xd2c9bcbfa475edb4, 0x18ade47bb8a0aa29, 0xf7a74af0ff1aea88, 0x6f8873274a987162,
+        0x6963e8d876f4d282, 0xd435d4fe448c6c5b, 0x93ec80ba404cafff, 0xcf90d24c509e41e7,
+        0x5f0fc8a62923e36e, 0x9224878fe458f3a4, 0xd9a039edf1945bcd, 0x0877d1892c288441,
+        0x75205491f4b4740b, 0x30f9d2d523a9085b, 0x4b7f4029fa097c99, 0x170bb013745709d4,
+        0x7087af537f11ef2e, 0x28c62b88e08fc464, 0x84bbcb3e0bb56271, 0x485a4b099165c681,
+        0x357c63357caa9292, 0x819eb7d1aee2d27e, 0xdaa759eb9c0f8c9d, 0x42cdc36729cc3db5,
+        0x9489aa852eddbb06, 0x8161e4f85a84e6d4, 0xa964863fdad3eb29, 0xcc095ddbce1a6702,
+        0x3ecfadbb8dc2ce58, 0x971316509b95a231, 0xc8f484d1dbc38427, 0xae9c510c463574c0,
+        0xdf2b31179600c21a, 0x440de87bada4dfa3, 0xbd8d30f3f6fb7522, 0x84e6d7f678a0e2d0,
+        0x0ec4d74323e15975, 0xf6947610dad6d9ab, 0x73a55a95d73fe3a5, 0x3e5f623024d37eda,
+        0x8d99a728d95d9344, 0x8b82a7956c4acdc4, 0x7faeaea4385b27f6, 0x540625ff4aa2ff21,
+        0x4aa43b3ebd92ce2b, 0x899646a6df2da807, 0x49225115780942d7, 0xe16606636af89525,
+        0xb980bcf893888e33, 0xf9ed57695291b0d8, 0x5c6dd14464619afa, 0x50606d69b733d4f3,
+        0x7fb1af465b990f97, 0x3fab2634c8bbd936, 0x556da6168838b902, 0x0f15975902a30e1f,
+        0xb29d782ae9e1991f, 0xae00e26ff8f7e739, 0xd3da86458bb292d5, 0x4528ee0afb27e4ce,
+        0x49882d5ba49fabad, 0x7e873b6a7cf875ee, 0x777edd535113c912, 0x94ed05e7ff149594,
+        0x0b8f95fc4211df43, 0x9135c2b42426fef2, 0x411e6c2b47307073, 0x503207d1af0c8cf8,
+        0xd76f8619059f9a79, 0x64d24617855dee45, 0xf7bc7a877923196a, 0xd6cc42ed6a65be79,
+        0xe3912ff09d4fc574, 0x4192d03b2bc2460a, 0xa0dcc37dad98af85, 0xfc59049b2a5818a4,
+        0x2128bae90a5b975f, 0xbe7067ca05ea3294, 0x5bab7e7753064c4f, 0x42cbf0949ef88443,
+        0x564df4bbd017492c, 0xf2c2eb500cf80564, 0x5b92e67eb00e92af, 0x8c4103eef59c0341,
+        0x83412122b8284998, 0x888daf2da0636b6d, 0x4d54b10303dd07d6, 0x201190e7c1e7b5ed,
+        0x3797510bb53a5771, 0x03f7bc598b570b79, 0xdc1e15d67d94f73e, 0x721e8b499ebe02c1,
+        0x71f954f606d13fa0, 0x0c7a2e408c168bf0, 0x07df2ef14f69c89d, 0xe295096f46b4baaf,
+        0x7a2037916438737e, 0xd1e861aeaf8676ea, 0xb36ebdce368b8108, 0xb7e53b090ddb5d25,
+        0x5a606607b390b1aa, 0x475e52994f4a2471, 0xbcc2038ba55b2078, 0x28b8a6b6c80df694,
+        0xb5f0130ec972c9a2, 0x7a87cd2a93276b54, 0x4d0eec7ecf92d625, 0xac1a8ce16269a42e,
+        0xa4ca0237ca9637b8, 0xd8dc8ff91202b6ff, 0x75b29846799d7678, 0x761b11a5edd9c757,
+        0xf2581db294ef3307, 0xe3173c2b6a48e20f, 0xe46fd7d486d65b3c, 0x1352024303580d1f,
+        0x2d665dae485c1d6d, 0x4e0905c825d74d3b, 0x14ff470c331c229e, 0xbdc656b8613d8805,
+        0x36de38e396345721, 0xaae682c1aa8ff13b, 0x57eb28d7b85a1052, 0xf3145290231d443a,
+        0xd0f68095e23cbe39, 0x67f99b3c2570b33d, 0x54575285f3017a83, 0x9b2f7bb03d836a79,
+        0xa57b209d303367a9, 0x7ccb545dd0939c79, 0x1392b79a37f4716d, 0x6e81bb91a3c79bcd,
+        0x2c2cd80307dddf81, 0xb949e119e2a16cbb, 0x69625382c4c7596f, 0xf19c6d97204fb95c,
+        0x1b2ea42a24b6b05e, 0x8976f83cd43d20ac, 0x7149dd3de44c9872, 0xc79f1ae2d2623059,
+        0xca17a4f143a414e1, 0x66d7a1a21b6f0185, 0xed2c6198fe73f113, 0x16a5f0295cbe06af,
+        0x5f27162e38d98013, 0xf54d9f295bdc0f76, 0x9ba7d562073ef77b, 0xa4a24daaa2cfc571,
+        0x49884cf486da43cd, 0x74c641c0e2148a24, 0xbff9dcbff504c482, 0xf8fc2d9403c837ab,
+        0x6ccc44828af0bb1e, 0xbcf0d69b4c19dfdb, 0x8fe0d962d47abf8f, 0xa65f1d9d5514271d,
+        0x26ff393e62ef6a03, 0xc7153500f283e8fc, 0xea5ed99cdd9d15cd, 0xfc16ac2ba8b48bb7,
+        0xf49694b70041c67a, 0xbd35dd30f5d15f72, 0xcf10ad7385f83f98, 0x709e52e27339cdc2,
+        0xe9505cb3ec893b71, 0x2ffa610e4a229af7, 0x12e1bc774d1f0e52, 0xe301a3bb7eacccc8,
+        0x1fdd3b6dcd877ebf, 0x56a7e8bda59c05aa, 0x99acd421035d6ab4, 0xfd21e401cecd2808,
+        0x9a89d23df8b8d46f, 0x4e26b1f1eb297b9c, 0x9df24d973e1eae07, 0xe6cdc74da62a6318,
+        0xfc360d74df992db0, 0xf4eca0a739514c98, 0x481c515ba9bf5215, 0xce89cce80f5f3022,
+        0xf487a10fc80e4777, 0x235b379a87e41832, 0x76f72e028371f194, 0xd044d4a201325a7d,
+        0x47d8e855e0ffbdde, 0x268ae196fe7334b0, 0x123f2b26db46faa8, 0x11741175b86eb083,
+        0x72ee185a423e6e31, 0x8da113dfe6f6df89, 0x286b72e338bbd548, 0xa922246204973592,
+        0x7237b4f939a6b629, 0x31babda9bedf039a, 0xb2e8f18c6aeec258, 0x0f5f6ce6dd65a45e,
+        0x8f9071a0f23e57d3, 0x71307115ba598423, 0xcbe70264c0e1768c, 0x1c23729f955681a8,
+        0xfbc829099bc2fc24, 0x9619355cbc37d5d6, 0xea694d4e59b59a74, 0xb41cf8d3a7c4f638,
+        0xae1e792df721cd0b, 0x7cd855d28aac11f6, 0xca11ba0efec11238, 0x7c433e554ce261d8,
+        0xe3140366f042b6ba, 0x8a59d68642b3b18c, 0x094fcdd5d7bccac2, 0x9517d80356362c37,
+        0x4a20a9949c6c74e8, 0xc25bcf1699d3b326, 0xa8893f1d1ed2f340, 0x9b58986e0e8a886e,
+        0x29d78c647587ce41, 0x3b210181df471767, 0xd45e8e807627849d, 0x1ec56bc3f2b653e3,
+        0x974ff23068558b00, 0xdb72bdac5d34262c, 0x23225143bb206b57, 0xd0a34cfe027cbb7e,
+    ],
+    // seed = 3
+    [
+        0x39209fb3eb541043, 0xee0cd3754563088f, 0x36c05fc545bf8abe, 0x842cb6381a9d396b,
+        0xd5059dcb443ce3bf, 0xe92545a8dfa7097e, 0xb9d47558d8049174, 0xc6389e426f4c2fc0,
+        0xd8e0a6e4c0b850d3, 0x7730e54360bd0d0d, 0x6ecb4d4c50d050d5, 0x07a16584d4eb229f,
+        0x13305d05f4a92267, 0xb278ddd75db4baec, 0x32381b774138608f, 0x61fe7a7163948057,
+        0x460c58a9092efee6, 0x553bf895d9b5ff62, 0x899daf2dabfd0189, 0xf388ab9c1c4b6f70,
+        0xd600fe47027ea4cd, 0x16d527ec2b5ef355, 0x5ac1f58ff6908c81, 0xa08d79ff8ee9ffe8,
+        0xc1060a80b7a5e117, 0x14b2c23118c60bda, 0x8cc0defbb890df8f, 0xe29540fd94c6d28b,
+        0xa604f003f82d5b71, 0xa67583d4eb066d18, 0xd62cbd796322b3fc, 0x070cfe244cdcccf3,
+        0x73557c30b3af47e5, 0x2e544e31153a2163, 0x996eef7464d5bead, 0xbc71cb5ab0586cdc,
+        0x0bfcb6c1b517ed69, 0x62b4f1fcc82e8ca0, 0x0edbc68f544965c5, 0x40fa39baa24af412,
+        0xf39aeb2413dab165, 0x17e6013e7afee738, 0x8109bff1c8d42a9d, 0x3cd99863390989b5,
+        0x02021a4cc9c336c8, 0xa06060778cb60aa4, 0xd96591db60bc1e06, 0xd2727175183f4022,
+        0xcdc1f1c5bce3e7ce, 0xb393ccc447872a37, 0xdf6efe63257ead3a, 0x20729d0340dbceb6,
+        0x9f3d2d26fc0ea0d7, 0xf392e0885189bd79, 0xdf2ee01eb212b8b6, 0x6e103a0c0f97e2c3,
+        0x96c604a763bd841b, 0x9fc590c43bba0169, 0xf92dcd5ddc248c40, 0x113a8b54446941dc,
+        0x5943eda146b46bb8, 0xbf657901a36a39a7, 0x5a4e0e7ea6568971, 0xb94c635bae9f9117,
+        0x2626fb65b3a4ef81, 0xa59bfd5478ce97de, 0x79112ba9cc1a1c63, 0xf41f102f002cf39c,
+        0x0a589bcbfb7ff1c8, 0xa1478c53540c4fa1, 0x60d55e72c86dfaca, 0x312e7b6840ea7a39,
+        0x8aae72dcccfe1f75, 0xff2f51f55bf0247a, 0x3c2e4b109edb4a90, 0x5c6d73f6525c7637,
+        0xe49acb04a199f61c, 0x27860642d966df7f, 0x541ce75fb1e21c30, 0xd9fcd6f90806c7cc,
+        0xb87c27bc93a7969b, 0x92f77a1179b8f8dc, 0xb1f29379deb89ed4, 0x7e63ead35808efe7,
+        0x13545183d7fa5420, 0x575f593e34cf029d, 0x27f1199fb07344ae, 0xe67f95f7dc741455,
+        0x49b478b761ab850b, 0xd7bedf794adfc21e, 0xdc788dcd2dda40ae, 0x14673eb9f4d8ad35,
+        0x0cced3c71ecf5eb1, 0xe62d4e6c84471180, 0xdfe1b9e2cb4ada7d, 0x70185a8fce980426,
+        0x0ce2db5e8f9553d6, 0x1fedc57bb37b7264, 0xb9310a2e970b3760, 0x989ff8ab9805e87d,
+        0x0b912d7eb712d9ee, 0x1fe272830379e67c, 0x16e6a73aff4738fb, 0xeed196d98ba43866,
+        0x7088ca12d356cbe2, 0x23539aa43a71eee0, 0xed52f0311fa0f7ad, 0xa12b16233f302eea,
+        0xc477786f0870ecb4, 0xd603674717a93920, 0x4abe0ae17fa62a4c, 0xa18f1ad79e4edc8d,
+        0xc49fe6db967c6981, 0xcc154d7e3c1271e9, 0xdd075d640013c0c0, 0xc026cd797d10922a,
+        0xead7339703f95572, 0x4342f6f11739eb4b, 0x9862f4657d15c197, 0x4f3cb1d4d392f9ff,
+        0xe35bffa018b97d03, 0x600c755031939ad3, 0xb8c6557ffea83abf, 0x14c9e7f2f8a122ea,
+        0x0a2eb9285ee95a7c, 0x8823fec19840c46f, 0x2c4c445c736ed1d0, 0x83181dff233449f1,
+        0x15ed3fca3107bef5, 0x305e9adb688a4c71, 0x7dbef196f68a3e2e, 0x93e47ece3e249187,
+        0x8353c5e890ead93c, 0xea8a7ae66abafdf7, 0xf956dbb6becf7f74, 0x9f37c494fbfdb6e4,
+        0x11c6cbaa2485dd32, 0x206f336fcca11320, 0x9befe9a59135d8fe, 0x5f3ef8b8db92c7db,
+        0xbb305e556ce0ce9a, 0xf26bdafb1305887f, 0xcbf28abe23f08c61, 0x0bc64173b914e00b,
+        0x9168da52e983f54a, 0x6ea41d09c3574a3e, 0x78aa44d4a74459ae, 0x2931422878387bf5,
+        0x018f64a3a92c2d9c, 0x9be43f6752e66b34, 0xae378890decd1152, 0x07325329a1cb7623,
+        0x3b96f4ee3dd9c525, 0x2d6ebcdbe77d61a3, 0x10e32b0e975f510c, 0xffc007b9da959bf9,
+        0x38bf66c6559e5d90, 0xbe22bdf0bf8899fe, 0x87807d7a991632a8, 0x149a0d702816766a,
+        0x026f723db057e9ab, 0xeeecb83625ec6798, 0xcec2ed5984208148, 0xd985a78e97f03c84,
+        0xf96c279e7927b116, 0x99d5027b3204f6e2, 0x13a84878c3d34c55, 0x5cf5ec96229e9676,
+        0x0bc36b07e4f8e289, 0xbed33b80a069914d, 0x2fbfbdd1ff4b9396, 0xab352bb6982da90f,
+        0x154d219e4fa3f62b, 0x4d087512bb6b9be7, 0xc582e31775ee400e, 0x7dadb002ae8c4a4e,
+        0xaae2957375c1aee2, 0x5f36ca643356625b, 0xf87cf8eb76e07fb7, 0x46f432a755e02cc3,
+        0x36087e07aba09642, 0xe5642c1e4ebb9939, 0xb9152d22338eefad, 0xf7ba44278a22cf7f,
+        0xd3b8013502acd838, 0x7761511da6482659, 0xb0857621638e8e50, 0x552eddb4a8b1d5f5,
+        0xc43d9861e812c3ea, 0xd765c2aada47910c, 0x21c935b68f552b19, 0x6256d5641a2b47dc,
+        0xab711d8e6c94bc79, 0xa8d0b91a2a01ab81, 0x5e6d66141e8d632a, 0x7638285124d5d602,
+        0x794876dbca3e471f, 0x951937d8682670ce, 0x0f99cb1f52ed466a, 0x8c7cd205543b804c,
+        0x2fd24d74a9c33783, 0xe5dcb7b7762e5af1, 0x45e6749cca4af77c, 0x540ac7ee61f2259f,
+        0x89c505c72802ce86, 0xeab83b9d2d8000d1, 0x9f01d5e76748d005, 0xc740aaef3035b6d0,
+        0x49afcd31d582d054, 0xcba5dc4c1efb5ddc, 0xc0a4c07434350ca1, 0xfc8dfaddcc65ee80,
+        0x157c9780f6e4b2d9, 0x9762a872e1797617, 0xc4afae2cf3c7e1bd, 0x71cde14591b595d4,
+        0x8843c3e0e641f3b9, 0xd92ecd91dce28750, 0x1474e7a1742cb19f, 0xec198e22764fa06b,
+        0x39394edb47330c7d, 0x00ba1d925242533d, 0xaed8702536c6fb30, 0x6d3618e531c2967a,
+        0x77f7cedcd7cc0411, 0xbc1e2ab82be5b752, 0x07b0cf9223676977, 0x596c693b099edd53,
+        0xbb7f570f5b9b2811, 0x96bfdad3c4a6840c, 0x668015e79b60c534, 0x3ad38d72123f1366,
+        0x6b994d81d2fcbb09, 0x70885f022c5052d8, 0xc891ee79d9306a7b, 0x2c4df05c0ed02497,
+        0x19ebc13816898be2, 0xea7c64df11c392a2, 0xb7663e88dd12e1bd, 0x79f768cb8e154c21,
+        0x1fb21b12e945933b, 0xe6a9045643f6906e, 0x544c47acd7e15371, 0xb7709b14f727e3d1,
+        0x326ee36a46942971, 0x477f1cf7b0e2d847, 0x88b8f6b82b3b0c24, 0x18bc357b80e3cd5c,
+        0x3333de70e4d66e0b, 0x4fd4c5e148583cf6, 0xae1b62f3008c0af3, 0xc49f419b6ab29cf5,
+        0x2c29fa65afc3fa28, 0x4b19d93734d03009, 0x7dd6c09e589276ad, 0x1cece97f30de48ad,
+    ],
+    // seed = 4
+    [
+        0x58bdf4338602e4fb, 0x71a5620b02c926d5, 0x3811c960129c2d9f, 0x29c2fb11fccac567,
+        0x0d6b1ea7780f1352, 0xcc4d3ddfae3f87b3, 0xfdd30257362a586b, 0xabc948fde69f25f1,
+        0x51b3523469d30f7b, 0xe0f0322724405ace, 0xd3729266d896da1e, 0xb10c37e5147915bf,
+        0x8b577039f9fa32a3, 0xe677c6a9cbfb44b3, 0x7317a756ebb51a03, 0xf8e988ef37359485,
+        0x600fc1ef3f469ff3, 0xbf0b8f8520444e01, 0x3711168b08b63d73, 0x34146f2944a6cb36,
+        0x717feb263862cdde, 0x7185f8347db00412, 0x900798d82127e693, 0x84089e976a473268,
+        0x10f8308c0d293719, 0xf62a618d4e5719b8, 0x8bdbd257a1a9516f, 0xf49f666fd7a75110,
+        0xbaf45e2db7864339, 0xe4efa1ea0c627697, 0x3e71d4c82a09fe10, 0x54a2a51cf12127bb,
+        0xa0592c9f54ba14cd, 0x27dd627a101c7a42, 0x3d2ceb44b3d20d72, 0x7ee1f94a68ca8f5d,
+        0x7e8cb8651b006c36, 0xbd9fa7ca3a475259, 0x856de173586a7b34, 0xcedb291b594cb1b5,
+        0xa3d6e462fd21cddc, 0x74561d10af9118e4, 0x13a3d389fc2d4b36, 0xeea8594a4a054856,
+        0xf56d7474d9ba4b13, 0x25ddce2f6490b2fd, 0x920653ff3a8d830b, 0xcd8c0c9cdac740d1,
+        0x2c348a738db9c4a0, 0x2967ccbe8ea44c22, 0x47963f69adb049f8, 0xf9d01eb5b4cf7eb6,
+        0x7a5c26eb63a86bd2, 0x62ad8b7a71fa0566, 0xb373213179f250ae, 0x589d4e9a88245a4d,
+        0x433dafebe2d558a8, 0x521fbef2c8fe4399, 0x62a31f9ff9ccd46b, 0x51602203eba7c1a6,
+        0x9afc8c451b06c99f, 0xb529085bdbaffcea, 0xac251825cc75892b, 0x94976a5bce23d58e,
+        0xdd17925b6c71b515, 0x568fd07a57bce92e, 0xefac31200d8bd340, 0x716c3e466b540ef9,
+        0x3d2c9e380063c69b, 0x14168f9a3662dd83, 0xd298c7504dbc412f, 0x74490a94f016719f,
+        0x0e0da431e1ab80c8, 0xe321f63dc6b169ae, 0xf08671544febc95a, 0x39324450cc394b3b,
+        0xea6e3d35f1aa3a70, 0x8ef8a886508ce486, 0xdc1a631ef0a17f06, 0xfda2b3fbcd79e87b,
+        0xd75bcae936403b10, 0xf88b5bd9f035f875, 0xc43efec2e3792dd4, 0xe9fac21a9d47cd94,
+        0xc2876f0c4b7d47c3, 0xaba156cf49f368b4, 0x5ccda2170fa58bf9, 0xadc92c879ed18df7,
+        0x110c1b227354e6c8, 0x298ee7a603249200, 0xde92142ede0e8ee7, 0x88e4a4610644ba9e,
+        0xbb62d277e7641d3a, 0xb9be1985b7bf8073, 0x29024e5426cdb0d1, 0xf6aefd01f3092ab8,
+        0x2a07087b313133aa, 0x6d71f445d6dfc839, 0x1e2412ff12e5526b, 0xed5cdeba6617b9e1,
+        0x20b1d0d5e5f8760e, 0x12ff15705c368260, 0x7bf4338b7c387203, 0x34ff25f00cd06185,
+        0x1148c706c518cf28, 0x5c04f0623388f025, 0xcb9d649275d87d79, 0x9b5f0c24fabc42ec,
+        0x1a7b5e7964e33858, 0x2a81bbd8efdc6793, 0x8d05431ffe42752e, 0x83915cd511002677,
+        0x580ed4d791837b31, 0x5982e041d19ff306, 0xcad0d08fa5d864ca, 0x867bee6efe1afa63,
+        0x26467b0320f23009, 0xd842414dfda4ec36, 0x047fcdcbc0a76725, 0xbddb340a3768aeca,
+        0xef4ce6fa6e99ab45, 0x88c5b66c7762bf9b, 0x5679f1c51ffb225d, 0xdab79048317d77ee,
+        0xf14e9b8a8ba03803, 0xe77f07f7731184c1, 0x4c2aab9a108c1ef5, 0xa137795718e6ad97,
+        0x8d6c7cc73350b88b, 0x5c34e2ae74131a49, 0xd4828f579570a056, 0xb7862594da5336fc,
+        0x6fd590a4a2bed7a5, 0x138d327de35e0ec1, 0xe8290eb33d585b0b, 0xcee01d52cdf88833,
+        0x165c7c76484f160e, 0x7232653da72fc7f6, 0x66600f13445ca481, 0x6bbdf0a01f7b127d,
+        0xd7b71d6a1992c73b, 0xcf259d37ae3fda4a, 0xf570c70d05895acf, 0x1e01e6a3e8f60155,
+        0x2dacbb83c2bd3671, 0x9c291f5a5bca81af, 0xd976826c68b4ee90, 0x95112eec1f6310a2,
+        0x11ebc7f623bc4c9a, 0x18471781b1122b30, 0x48f7c65414b00187, 0x6834b03efa2f5c30,
+        0x0875ef5c2c56b164, 0x45248d4f2a60ba71, 0x5a7d466e7f7ba830, 0x2bebe6a5e42c4a1d,
+        0xd871d8483db51d10, 0x6ee37decd2fd392f, 0x7d724392010cede3, 0x8e96ef11e1c9bcc8,
+        0x804a61d86b89d178, 0xbb1b83ce956055ec, 0xcb44e107410ff64f, 0xc426bb09ee0ba955,
+        0x057c08f42c3dd7f1, 0x40ea1ec148602bdf, 0xc24688deeb65d7f1, 0xd8bcc53c768ba4e4,
+        0x16e0e3af65c1106c, 0xfc12f7e7d647218b, 0x70d6e1d3ee93cef4, 0x01d2a505c4541ef9,
+        0x1ef79e16e764d5c3, 0x0363d14d13870b98, 0xb56ef64345d06b11, 0xe653d557ebb7c346,
+        0x8304a8597c2b2706, 0x1536e1322ce7e7bb, 0x525aec08a65af822, 0x91f66d6e98d28e43,
+        0xe65af12c0b5c0274, 0xdf6ae56b7d5ea4c2, 0x5cef621cedf3c81c, 0x41e8b1ffd4889944,
+        0xb5c0f452c213c3e5, 0x77af86f3e67e499b, 0xe20e76ea5b010704, 0xbdc205ab0c889ec0,
+        0xc76d93eb0469cd83, 0x17ac27f65cab0034, 0xd49ec4531fd62133, 0x07a873ea2f1b9984,
+        0xbff270dfef0032ee, 0x1764dbe91592f255, 0xe40363126f79e859, 0xa06cad3ab46971f6,
+        0x0be596e90dedd875, 0x3387cce5c1658461, 0x44246acf88a9585e, 0xe0ad82b92d5ecb2c,
+        0x2177491c9a1600a6, 0x16e7c4aac0f02422, 0x75792eeeec15c4e1, 0x2309cd359d08ee30,
+        0x7cd9831dd1b83b0a, 0x374914a7c4ee8cf0, 0x0dd17765c9ac2e54, 0xb7847470ba9a7688,
+        0xfba4f4bbe2991173, 0x422b203fc3de040e, 0x63bfcaf2ecf2ab0e, 0x0c5559f3a192946e,
+        0xfdf80675c1847695, 0xf5f570accab842c9, 0x65cc5a448767afea, 0x1efeb0a7ee234f2f,
+        0x9b05f03d81e7b5d2, 0xe7c31317a8626cf4, 0x620f2a53081d0398, 0x1b6de96cdd9943ae,
+        0x8c226a436777d303, 0xa08fbbd50fafb10d, 0x6a64c5ec20104883, 0x9c9c653502c0f671,
+        0x678a02b2174f52a0, 0x68e008ba16bbad4b, 0xa317c16d2efb860f, 0xeab2075d17ed714c,
+        0x565eeeddf0c4ea15, 0x8ec8e94d242a6c19, 0x139e8e27d9000fae, 0xc977a7ff1b33d2f5,
+        0x1d0accca84420346, 0xc9e82602cd436e03, 0x6a2231da53d2ccd3, 0xb44b12d917826e2a,
+        0x4f4567c6a74cf0b9, 0xd8e115a42fc6da8f, 0xb6bbe79d95742a74, 0x5686c647f1707dab,
+        0xa70d58eb6c008fc5, 0xaaedc2dbe4418026, 0x6661e2267bdcfd3d, 0x4882a6eda7706f9e,
+        0xf6c2d2c912dafdd0, 0x2f2298c142fd61f9, 0x31d75afeb17143a8, 0x1f9b96580a2a982f,
+        0xa6cd3e5604a8ad49, 0x0dae2a80aad17419, 0xdb9a9d12868124ac, 0x66b6109f80877fac,
+        0x9a81d9c703a94029, 0xbd3b381b1e03c647, 0xe88bc07b70f31083, 0x4e17878356a55822,
+    ],
+    // seed = 5
+    [
+        0xb3c58c2483ad5ead, 0x6570847428cdcf6c, 0x2b38adbf813ac866, 0x8cb9945d37eb9ad3,
+        0xf5b409ec3d1aed1c, 0xa35f4bffc9bb5a93, 0x5db89cde3c9e9340, 0xff1225231b2afb2b,
+        0x157b0b212b9cc47d, 0xf03faf97a2b2e04d, 0x86fdab8544a20f87, 0xfcb8732744ae5c1c,
+        0xd91744c0787986d5, 0x5f8db2a76d65ad05, 0xcff605cbed17a90d, 0xf80284980a3164e7,
+        0x59cc24e713fccc7d, 0x268982cada117ce4, 0xcd020e63896e730e, 0xe760dc46e9fe9885,
+        0x6aaece8ab49c6b5d, 0x7451194d597aae3e, 0x35d4385900332457, 0xa40fb563a096583d,
+        0xa797b612f7f11b76, 0x2fed6eb68e6a2b9b, 0x2f06ee64aeffd943, 0x9dd0e49d9ca45330,
+        0x97d48f08bd7f1d8f, 0x1cfa7fe3ebe4d8ee, 0x2a2ba076bd397d42, 0x68c4344f7472f333,
+        0xce21ec31987d74b5, 0xb73dabdc91d84088, 0x801aadee592222fe, 0xaf41345398ebc3f5,
+        0x8a8f653d7f15ee46, 0xce2d065ff2ba2965, 0x4e05da515da2adb7, 0xa6dbdb8aa25f0fd4,
+        0xca9f9666bbd2d5a9, 0x6b917ce50bd46408, 0x1550cc564ba6c84d, 0xb3063ae043506504,
+        0x84e5f96bb796653d, 0xe2364798096cf6e3, 0x3b0dfedf6d3a53d0, 0xb7e4c7c77bde8d93,
+        0xe99545bac9ab418a, 0xa0e31f96889507bb, 0x883c74f80c346885, 0xf674ae0b039fd341,
+        0x8bb6ce2d5e8d1c75, 0x0c48737966a7ed7c, 0x04fcdf897b34c61c, 0xe96ac181bacbd4d6,
+        0x5a9c55a6106a9c01, 0x2520f020de4f45d3, 0x935730955e94d208, 0xce5ad4d7f3f67d3b,
+        0xa4b6d107fe2d81ca, 0x4f0033f50ae7944e, 0x32c5d28dd8a645a7, 0x57ce018223ef1039,
+        0x2cbab15a661ab68e, 0x6de08798c0b5bec2, 0xee197fb2c5c007c6, 0x31b630ac63e7bda2,
+        0xab98785aefe9efe3, 0xa36006158a606bf7, 0x7b20376b9f4af635, 0xa40762fdc3c08680,
+        0x943b5faffd0ebee2, 0x7f39f41d0b81f06e, 0x7c4b399b116a90f8, 0x24e1662ac92bc9f3,
+        0xcf586fc4e8e6c7db, 0xe46e0d047eeb12d7, 0xe8021076e4ea9958, 0x11fc13492e3ca22a,
+        0xd61eae01410397e3, 0x7e8c4a58036a8e9f, 0x068a6de267970745, 0x64faab129bef1a41,
+        0xb4a6f720943dad01, 0x631491058d73a9d5, 0xdad4fe95eab3ec02, 0x0a8b141c5c3a44f6,
+        0x9fc69d4c2b335b98, 0x94d5f84a07d6e4cd, 0x1b73965de143c608, 0x443932c2dda54bcc,
+        0x7397818fb0b04cd2, 0xef4ab03a1202b277, 0xf3d2ee459c0c2b92, 0x182d4daf8b058a87,
+        0x90e63035d7b51368, 0xba4cd8b9a95d45fd, 0x12a7392c76731090, 0x890d264ec5d082d2,
+        0xeeaf5c363da4994e, 0xd6aad756902123fb, 0xb531ebebdb28f191, 0xe71ce659fc59babd,
+        0x37c1b94f63f2dcb5, 0xe4e3abeb311f9b96, 0x4a31b72ccb8695d3, 0x52cae1f0629fdce4,
+        0xe5b0475e2ed71369, 0x2724e8c3506414fb, 0xbab0367920672deb, 0x0161a781c305449f,
+        0x37b70f40f5bb60be, 0xddd1094c50251a01, 0x3b28283afd17224e, 0x06dec0cfe889fc6b,
+        0x47608ea95bb4902d, 0xad883ebc12c00e82, 0x9e8d7ae0f7a8df29, 0xa79443e9f7c013a1,
+        0xcfa26f68b7c68b71, 0x33ae6cc19bda1f23, 0xd9741e22b407887f, 0xf2bff78066d46b1c,
+        0x794123191c9d32d4, 0x56cb6b903764ec76, 0x98775d0ef91e1a5a, 0xae7b713bc15c1db9,
+        0x3b4c1a7870ed7a0d, 0x46666965f305cc34, 0x0ea0c3b2e9c6b3cd, 0x4dc387039a143bff,
+        0x5f38bb9229ef9477, 0xea5d39ba72af7850, 0x69a5ed0174ce2b6d, 0x06969a36bfe7594d,
+        0x0adee8e4065ccaa3, 0x908a581d57113718, 0x64822d6c5a8190ed, 0x8c5068b56ace4e4c,
+        0x88ba3b4fb4e30bef, 0xa6ec0b8bb5896cfe, 0x4e23fcc6b47996fd, 0xe18e75b0dd549c7a,
+        0xcd90f17e106cf939, 0x1666fdfb2ef7c52f, 0x4fae325f206dd88c, 0xe7bc1160e25b062d,
+        0x3cc999cb246db950, 0xc5930a7326cd5c37, 0xb008a48a211367bd, 0xc5559da145a88fd4,
+        0x1e3ad46655fac69c, 0x7834266b4841bfd7, 0xa764450fbffc58cc, 0x54d8cf93a939c667,
+        0x93c51f11b21b2d9d, 0x0964112082ed65cc, 0x4c2df21213e7fb03, 0xf0405bc877468615,
+        0x17b4fc835d116ab4, 0xa6b112ae5f3cb4ef, 0x23cfc8a7fd38a46e, 0x8e0a360dc2774808,
+        0x24ca9c8092105ad5, 0xafd3f75524f2e0d5, 0x4f39ed7dbaddc24c, 0xe5e362c7679a7875,
+        0x00914a916b07b389, 0xdfe1119b7d5ab5da, 0xabd6ed9940e46161, 0x630ed2044171e22c,
+        0xdecc244157dd1601, 0x777e6d5b4b4868d5, 0x9b3530bee67017d8, 0xd2faf08b291fdcb9,
+        0x006e99455d6523de, 0xd559b5817f6955b5, 0xefcc1063b0088c61, 0xed73145ae0f00ae7,
+        0xab2af402cf5b7421, 0x897767f537644926, 0x26c9c0473ca83695, 0x192e34e1881b2962,
+        0xf7cf666ec3b3d020, 0x27f9b79c7404afb7, 0xe533e8bed3010767, 0xe5817838e11d05d3,
+        0x65659c531bd36517, 0xd427c5e0a23836fd, 0xf3eab7ea58fa3528, 0x07683adae1289f35,
+        0x201d6af7e896dd32, 0xd5da938b9a21ad88, 0x843fb73ad67bc316, 0x1782ec7d5feef21b,
+        0x943f66f6ec772877, 0x7e9112e7b26da097, 0xeac8161f8663c2c7, 0xe8600db480a9ebf4,
+        0x07807fc90f6eaf5f, 0xe0e4c9deb41abf83, 0xbdf533db271f9c15, 0xb398411b0497afe2,
+        0xdebb45ef25448940, 0xe7a5decefcd376c4, 0xaf1ef3c728c83735, 0xb8b83a99355cb15a,
+        0x6444a0344f1611e4, 0xe8bb7f5cf3c60179, 0x77ab5c5177e75ff7, 0xc38fd6fa849d585d,
+        0x390d57d53029060a, 0xa66327eb7b8b593c, 0x6350a14f6fcd5ac9, 0x2c08125bcd7008b4,
+        0x2d00c299a6a6bf8e, 0x6b0039c1f68d1445, 0x0035150c5d06f143, 0xa34d01628cc927e1,
+        0xdf5b3164d7b2ede1, 0x8167db1d0583d72e, 0x4e13b341cd2ae8bc, 0xa693d9b1f416e306,
+        0xc15ed7ca0bc67609, 0xdc344313c1c4f0af, 0x88b6887ccf772bb4, 0x6326d8f93ca0b20e,
+        0x6964fad667dc2f11, 0xe9783dd38fc6d515, 0x359ed258fa022718, 0x27ac934d1f7fd60a,
+        0xd68130437294dbcc, 0xaf5f869921f8f416, 0x2b8f149b4ab4bf9f, 0xc41caca607e421cb,
+        0x7746976904238ef9, 0x604cb5529b1532f0, 0x1c94cd17c4c4e4ab, 0xe833274b734d6bbe,
+        0xe9f1d3ef674539ce, 0x64f56ed68d193c6a, 0xe34192343d8ecfc1, 0xcb162f6c3aa71fe8,
+        0x99eaf25f4c0f8fa4, 0x92f11e7361cb8d02, 0xb89170cddff37197, 0x4f86e68a51e071e3,
+        0x31abf6afd911a75b, 0x6d20cf259c269333, 0x4150b9f88fcb6513, 0x705063989ebf7451,
+        0x559231d927c84410, 0x1ca8ec4b098bc687, 0xebed22405c9180e0, 0xaa815b37d052af59,
+    ],
+    // seed = 6
+    [
+        0x946ac62246e04460, 0x9cebee264fcbc1ae, 0x8af54943a415652b, 0x2b327ed3b17b8682,
+        0x983fde47b3c3847e, 0x10a3013f99a2ad33, 0x6e230bb92d2721ef, 0x1cf8b8369e5c5c50,
+        0x7f64017f2b7b3738, 0xd393248a62417fa1, 0x9ff01c0b20a372c5, 0xb0e44abce7e7c220,
+        0xcebb9f88d48a815f, 0xdb7df6bd09033886, 0x7844fc82b6fa9091, 0x72d095449863b8ec,
+        0xc13e678c89da2c7e, 0x6caf4d5ad231d12f, 0x2e0ab7b5fcf35c49, 0xf410720cb932a70f,
+        0xd66ea581f16fce06, 0x175c9f002f57dc98, 0xccbcfd0d32988775, 0xfde4c407d3b0a232,
+        0x5db2931ae7e97223, 0x6e07e2173085809f, 0x6e1d1ec0f9cad73c, 0xb2fc251a7f802619,
+        0xbc1fc17f04f342de, 0x8de8f21ec658e078, 0x72c0f40cbee53fd6, 0x0678244411fc17a1,
+        0x1d5837ca166b9bbd, 0xc8cada003c554345, 0x6a2fe2bfb2e58652, 0xfca9d797a6f7988b,
+        0x6699e24ac737948b, 0x69623ffcb05789ba, 0x946429c529d95b75, 0x0d14df0b2a13970f,
+        0x593d8592c440dfec, 0x2ee176f3d7e74b94, 0xae003f1da3be9e26, 0x0c7b02c4c0f6764a,
+        0x3117e2fa1f632462, 0xf0f23265b6f1eaeb, 0x3111255d9b10c137, 0xc82745e509a00397,
+        0xbd1d04037005fea7, 0xe104ab0dd22a9036, 0x51b27ce50851ac7a, 0xb2cb9fb21b471b15,
+        0x29d298074c5a3e26, 0x6ebdf2058b737418, 0xc4a974041431b96f, 0x1ec5a30ccb6bdaac,
+        0xe818beede9bf4425, 0x4b69b1bce67a5555, 0xf5c35f1eb0d62698, 0xf4509bbd8e99867c,
+        0xb17206debd52e1bc, 0x35785668c770b3be, 0xe9343987ff5863bc, 0x2ee768499ac73114,
+        0x5132bb3426eeaaf4, 0x471bce2c6833c5ff, 0xbb9a2d5428e6f6f9, 0xd5678943c595792d,
+        0xab2a65e7f81e479c, 0xa82407bb23990b31, 0xdae321383984923c, 0x01823bb22648e6f1,
+        0xda6e8df4214a8b04, 0x0e172bb88e03d94f, 0x552da6c22e362777, 0x7ce67329fb0e90cb,
+        0x7b2d7f287ede7ebf, 0xd44f8222500651bd, 0x4acca1ef58fbb8ab, 0x428ecf058df9656b,
+        0xd7e1ec6a8987c185, 0x365be6a54b253246, 0x168849be1e271ee8, 0x6a00f3c4151a8db2,
+        0x37602727ca94b33d, 0xf6b50f18504fa9ce, 0x1c10817f6bc872de, 0x4bfe1fe42b0f3638,
+        0x135fad4b8ef6143b, 0x1b25ad2bafc25f58, 0x41e37f85cf321f92, 0xfc73f75d9d5b9bea,
+        0x9eb3694d1e9cb7e1, 0x601d51f08fa83b90, 0x234a2a9b88366f41, 0x63fe903e16f2c3bf,
+        0x1cdbd34fa751c0b0, 0x0ce4fc6747c0558c, 0x51ed72afb8bb49aa, 0x20313ba13ca12c96,
+        0x271fa38f9ebd54c1, 0x3696a5ac03a8edde, 0x05602be7df625702, 0x11f1ac73790f7a9f,
+        0xa2836c099f0810bd, 0xe5ac2e47caa532fa, 0xd9c000a66d39f681, 0xd93d900e6f3d9d5f,
+        0x792c81c65b7900f2, 0x5c5dce790ee20da1, 0x74ff1950edec1aee, 0x71fc85fa1e277d8f,
+        0x0e77df17d6546cbc, 0x07debad44816c3b4, 0xbafa721581e92a70, 0x8ab6fbe2ed27bba8,
+        0xe83243a20dea304a, 0xaa85a63a84c00a07, 0xde0e79917fc4153a, 0x21bb445e83537896,
+        0xeedcac49fc0b433a, 0xffb2926a810ae57a, 0xf724be1f41d28702, 0x79cb95746039bb3b,
+        0x5a54fe3742a00900, 0xda4768d64922c04f, 0x420396a84a339dae, 0xa171e26ee5e8724e,
+        0x4c8da7c5d289c20a, 0x9ebd79a1a8e94742, 0x39235232b97e9782, 0xb75df0be9bba7d80,
+        0x0c1d204dd87d48fc, 0x8f81f3e7177266e8, 0xe4a460b39e78d72b, 0x50b98fa151e65351,
+        0xb7cb585c3ee1eddc, 0x11cdad9a76ee1dc4, 0xa38054a78595dc1c, 0x92f09e2ec4978edc,
+        0xa8f0061b5efdabaa, 0x04bcc4abc224d230, 0xc58606738e692d46, 0xdd2b27b565952433,
+        0x19e6ed1b740beec0, 0xceadd49b2ef9891f, 0x328178c28fe95cad, 0xe5ad4c43afe02848,
+        0x03c0cb538cd967c0, 0xec4352526d19a630, 0x4c7e99389d39b031, 0xf65dd05362c2deb6,
+        0xd1e70daf6879d28d, 0xbe9f57db6309b265, 0xa4b66f370b872bb7, 0xe26896fbc6ee1fd5,
+        0xac705e661bfcf7c5, 0xab4d0d07d7f09940, 0x976417c06aeb6267, 0x8161c684a6bd468c,
+        0xf77b6b9976dc4601, 0xc6489b779a39c12c, 0xb2aa58d5681cea1a, 0x043b1b40f8c3e04c,
+        0x681fcbfadc845430, 0xab8896c921ba8def, 0x57aaf172606f37b2, 0xc3735048cd5eb8d7,
+        0xa7078b96955631bd, 0xdd6b3543aa187f33, 0xc7103ea4a2a697fd, 0x8d7b95f6ff1f7407,
+        0xe44f419e84709530, 0xf340caa9132cbb0a, 0x2ba407283143c66c, 0xe1be240ca636c844,
+        0x90d32f2877ac08bc, 0x5d26e6294b2c8673, 0x4a6b2f5b27c87a44, 0x961fb9043f76d34f,
+        0x0afee02d8d3c55d2, 0x6228e3f48c42e5dc, 0xc338e69ee6593675, 0x853f74b16efb7bdd,
+        0xd062f40bdd22e687, 0x647164b9ab4c4190, 0xf94689f67d598369, 0x8e4b29d87a5012d7,
+        0xaf02b8b925656fbd, 0x7a722a767179a630, 0xb5c8afe937a75ace, 0xfdb8e8d02d279372,
+        0x887ef700cb25fae1, 0xcfe9bd912f72cabe, 0xb1d4dedc24f978de, 0x517522d38319cc2a,
+        0x7dd87b2b36aab798, 0x579c4ff3046b5a04, 0xf5c5975c5028b7a7, 0x7094579d1000ec84,
+        0xbc8d5b1ea70a5291, 0x161b2d783be8855c, 0xd26d0b0d6d18279f, 0x0be1945f02a78bd5,
+        0xb822a5a9e045415b, 0x2fe9d68b1ccc3562, 0xb2e375960033d14f, 0x26aca04e49b4ff22,
+        0x732a81c862112aea, 0x8bd901ed6e4260b8, 0xe839532c561ad5b0, 0x8fb6e4d517a79b12,
+        0x0dd37f8c0be9b429, 0xc8ad87ad12f1b1b0, 0xc51f3aa62b90318b, 0x031a7e8b86c1cefc,
+        0xa95547af2b70fc76, 0x9cb3615c5a98801e, 0xa387e3c3341d7032, 0xa087ea52a1debaef,
+        0x16325ec9a2e6e835, 0x587944a484c585eb, 0xc8879033bde22ecc, 0xa39dbfce709c464a,
+        0x7acc010f99208774, 0x98dd2973a096c5ad, 0x26458b51139f198c, 0x2f5d19575e8c4f02,
+        0x726643f0d38af352, 0x44d879b6d73e6e94, 0xa68a03885c980abe, 0x06048acd161c40c0,
+        0xa4dab8f89d405d28, 0x7120c880cb04be18, 0xa062ace22a1cf0cf, 0x3901a9daf29704f4,
+        0xff08f3ed989db30a, 0x6d22b13e874c67e9, 0x80c6f35518d73f4d, 0xc23c2a521aac6f29,
+        0x2e708fd83aaa42e0, 0x7fc3780f55f1b0fd, 0xabb3075c98cf87f2, 0xb4df3f40f7c61143,
+        0x2a04418098a76d75, 0x0d9eeee9509b2d37, 0x6be8ae51f4b59cdc, 0xe746cc7c00e4a2ab,
+        0x785bc6df9cac597c, 0x33cb6620ce8adc48, 0xc1ba30739bffcef7, 0x6d95771f18e503f7,
+        0xf7be3ae2e62652ff, 0xc8d82ffd2a73c62b, 0x8725a3ba5b110973, 0x67ed6b9c724757ec,
+    ],
+    // seed = 7
+    [
+        0xc0272d42c19ff3ae, 0x4694228b43ea043b, 0x5709a6ef8a462841, 0xc9210a1e538805c9,
+        0x279b171196113ec2, 0x859b769fc2d9e815, 0x0d5d3125a2bf14d3, 0x22bca1cfefa878ba,
+        0x481b6bf58037bd83, 0x4933ba8647728d22, 0xf08c7b6b56f6e1b6, 0x374e8af5a15407c7,
+        0xa95c4dc3d2487a5c, 0x9b832808ff11e751, 0xf2048507e9da01d5, 0xa9c576189f544a4a,
+        0xf6c2a45b2e9d2b41, 0x9b9874c9f10ecc2f, 0x37d9b5f51f8c149e, 0x93aead54c9de9467,
+        0x59cf0b4af262da23, 0xe7e9929af18194b2, 0x9df2644e33eb0178, 0xde4122d6f0671938,
+        0xf005786c07f4800b, 0xb1fc9d254b5d1039, 0x0bf1088631f6dd7b, 0x665623f0a4b8f0c7,
+        0x60f0113a9187db7c, 0xfd7cceda4f0d23a6, 0x26c01e9d89955940, 0x33afa1dfc0f5a6a0,
+        0xeb77daf215e9283c, 0xc7575214bf85edb4, 0xeb0d804bf297e616, 0x84bff4ffd564f747,
+        0xc4ac33189246f620, 0x43ef61213ecc1005, 0xcbbb0dea6cd96acd, 0x8ed27abfa8cfcb05,
+        0x543b61529cb996b6, 0xa5f987ca41ea5e59, 0x3c50e0ac5254cb7a, 0x4192b0446c06d1e6,
+        0x3e86592e21b45388, 0xdb766f06fcc6e51e, 0x0448ee36efe632db, 0x663c9db689253e35,
+        0x72e0bd4985331dd4, 0xff501b5bf7d94e74, 0xe911ce758e2113a8, 0xec3a8d03a75a6ba4,
+        0xaf6b4b72f56edc83, 0xf284857936c0a391, 0x5ba6feff407d46f4, 0x9d689c26de9d6702,
+        0x28c04a9083726b5d, 0x2ccf4a627a029730, 0x7b4719500d4f0c71, 0x76470a9a7da250a8,
+        0xcc48409404a1c890, 0xccefbdc7ec9a8055, 0xe0db91bff3cc42d3, 0x0532436426141254,
+        0xf2ee9325e6f0ff0b, 0x149c20a5fbb28d9d, 0xe71624cd8d2d14d4, 0x8f01d4dc8cc2dd77,
+        0x29cf409b333015b7, 0xba8bebd211884dd1, 0xc3396635e8c8db1d, 0x8ed0f6208d0528b8,
+        0x0d90b43fdd0ee334, 0xd73c9a3333a044c7, 0xa2595cd208dbdc38, 0xae93cb264f940c09,
+        0x8e0538d8afb07a97, 0x19115ec881385ba2, 0xa886f9e6a8039c6a, 0xcd5d62147ce3ecac,
+        0xaecdf9e0bb4969f7, 0x2ddd631c53dcad10, 0x73ad1c97b3412054, 0xb08915fa2722efc6,
+        0x97966047e5067eb0, 0x337f1675ed91445c, 0xb3a833d150b96a0d, 0x5940a98fe35e5e2e,
+        0xfd03cc354ed0d8ff, 0x4e65b98291a8644a, 0x14a259f2852a60b2, 0x7648e3478c1e8e5f,
+        0xbc0fbef6d9a919b4, 0xbec4302081346cf1, 0x57d2ce7aa1c7c511, 0x234c209d8f4e1ac3,
+        0x87cf80cc933ce443, 0x7c262c616931e94e, 0xc5e33b049cf9eddf, 0x1a80790ed03ae51b,
+        0xf2e8b9494f7220cf, 0x124cb59c14fff3ff, 0xa8a06cbfdb86ce18, 0x9068ef1f80b37653,
+        0x0c55417b8d90338f, 0xcd579a523f6bcd30, 0xa31bfe2476a8d2a9, 0x1f8d142208094223,
+        0x332dc40a5203cfad, 0xf8792fe5b2d33b4c, 0x443bd9668bf9461e, 0xc9019db0ace1409e,
+        0x781bea919a113e8b, 0xb0f11d866abfbeec, 0xcfe139a60db0c26a, 0x869ab8721e6aa39e,
+        0xdb48a4977717837a, 0x588a5ff151065b18, 0xe4a251ea0028864d, 0x7f0e43ba408a77c3,
+        0x65f66dd50a536135, 0x6f49e934d9331c3e, 0xb8d742e0f0fa6b09, 0xe4e9b272deca2348,
+        0xaee132ff902f773c, 0x43f658f7c2a0c90a, 0x28cb4dbc76cc53ea, 0x7d92253aa99ac39b,
+        0x4fea3d832370baab, 0xb29e36936e51d78e, 0xea10778712321064, 0xff4f21f8ef274be2,
+        0x84eff18ddfa0933f, 0xd0ec6a9f86c758a0, 0xaf82e5973c431ae0, 0x352023c00c045425,
+        0xad34d7bc4a2f8961, 0xbdb4a02a24d4dee0, 0x354a4846d97447cf, 0x331a8b944d5bc19f,
+        0x5ce04f8e17909035, 0x6497581bad8f4aab, 0x07c503bba647111e, 0x85f412ba78e1f7ff,
+        0x7f3b920fd20f4cff, 0x424e1a9a4ce34e2f, 0x3035e2d62e1b9f0a, 0xef63114bff7b729a,
+        0xe86a05889ab6bb60, 0xee0830cf095585a1, 0x4a54f7fa47d9c94b, 0x17daeece9fcb556a,
+        0xc506d3f391834c6f, 0xb3f24be362e1af64, 0xc435e4e23608efdd, 0xeeba9caaa4cc1768,
+        0x5a71f306daddc22d, 0x18e5205f41eba1a0, 0x7b29b4d1f6610925, 0x065cb65a0258d9a9,
+        0x3e5ac8faa9fd1f95, 0x3b362362c1ea0470, 0xce0e4f6434db7a2e, 0xf327341098de52f2,
+        0xcfca3b9e2a1992c3, 0x7483bf9401233e41, 0xbafbac531c6f9281, 0x4b52dd71b2c106f8,
+        0xdf73b66e50b5a1f7, 0x237aec0202a20283, 0x23dd5be23dffdf2b, 0xea9730731ee122ef,
+        0x5cb3f846014fbcd3, 0xc3b21c8ffdce9201, 0x06a99a02f91a8760, 0x721a81fa8fd7b7a3,
+        0x6aafcdddc53cbcd8, 0xd03b464005a93bcc, 0x8212edc1b1669dcb, 0x71f4c31364c31bc7,
+        0xfeeec0eba8772307, 0x1948d00a13d88cf1, 0x19064fd6d943ada8, 0x4ec8d31722697bfd,
+        0x596d9a953a516609, 0xc4cb4bff53507da2, 0x1d59f3c5be36e4ca, 0xe5b4fc5bf6044c9b,
+        0x1bb74e052232f735, 0x04e8a0db611ddd5d, 0x8d04eaa009b421bf, 0xa7878ae0ac0e6d58,
+        0x28c1030217cab2b3, 0x827943767e56a883, 0x28fce5fa02d22809, 0xb30c322fffc8c58e,
+        0x1ca5a6a9f8066c5b, 0xb24db5f1462b2513, 0x02f653b89b7e5f6c, 0xe31f8fb5d5f78eee,
+        0x266acc514ed93501, 0x936879d1c6fddcc4, 0xcd51be3636af1952, 0x3fdbb6fc332c78c8,
+        0x9eb656379fa73094, 0x056146cc92fa0f96, 0xed6c4f1836c027c3, 0x021e0bb5d2113f2a,
+        0x8983e42ec1c626b3, 0x73ea9bc6513ad9c9, 0x0c904903b24f4247, 0xacbac1e6243e2525,
+        0x0b1069a0c230fb06, 0x77d709fca3fc1ce5, 0x87ad0f65020947e6, 0x555302641c53f4e6,
+        0x65ea87871fa9aaee, 0x58aaf4ecc1067bb4, 0x1a66c48cc4c65b3f, 0xca96aca48b2ea969,
+        0xa68eb70bad14de2b, 0x5ccdb3d7e00a6f6e, 0xe178fbfec73fe72f, 0x2b63d6a16b83e890,
+        0x32fdb7a5330fbae0, 0x2ab5803c8d1bf32c, 0xda838388c1527c94, 0x16a50bdc4de24acb,
+        0xe561301f134c074a, 0xd7ae63d2816b4db1, 0x036aabd4df0dd741, 0xc5e0db8783435b9d,
+        0x9c4386cf0a07f3b2, 0x6a72ac1aa56a13a1, 0x299bbdb04bb20a23, 0x138c1018fda16b81,
+        0x0e354f0b3bda49df, 0x9f4c295b23127437, 0xd133ceb2bd561341, 0xd8b4bfd5a526ac29,
+        0xcdd0a70ddc1c7bbd, 0x81dce595bf572225, 0x1c6f925c05f6efd7, 0x8ae5097553856ea0,
+        0x3aabeaeef248f60d, 0xd9005809d19a69e2, 0x2a3a1a314311cc27, 0x89bb2dc76b2b624a,
+        0x50a2a95d0412e289, 0x9def8df564e68581, 0xf49010a9b2e2ea5c, 0x8602ae175d9ff3f0,
+        0xbf037e245369a618, 0x8038164365f6e2b5, 0xe2e1f6163b4e8d08, 0x8df9314914f0857e,
+    ]];
diff --git a/parquet/src/column/chunker/mod.rs b/parquet/src/column/chunker/mod.rs
new file mode 100644
index 000000000000..42631e026db4
--- /dev/null
+++ b/parquet/src/column/chunker/mod.rs
@@ -0,0 +1,40 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Content-defined chunking (CDC) for Parquet data pages.
+//!
+//! CDC creates data page boundaries based on content rather than fixed sizes,
+//! enabling efficient deduplication in content-addressable storage (CAS) systems.
+//! See [`CdcOptions`](crate::file::properties::CdcOptions) for configuration.
+
+mod cdc;
+mod cdc_generated;
+
+pub(crate) use cdc::ContentDefinedChunker;
+
+/// A chunk of data with level and value offsets for record-shredded nested data.
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct CdcChunk {
+    /// The start offset of this chunk inside the given levels.
+    pub level_offset: usize,
+    /// The number of levels in this chunk.
+    pub num_levels: usize,
+    /// The start index into `non_null_indices` for this chunk.
+    pub value_offset: usize,
+    /// The number of `non_null_indices` entries in this chunk.
+    pub num_values: usize,
+}
diff --git a/parquet/src/column/mod.rs b/parquet/src/column/mod.rs
index 1e534bdd6b77..115c8dd01b80 100644
--- a/parquet/src/column/mod.rs
+++ b/parquet/src/column/mod.rs
@@ -117,6 +117,8 @@
 //! assert_eq!(rep_levels, vec![0, 1, 0, 1, 1]);
 //! ```
 
+#[cfg(feature = "arrow")]
+pub(crate) mod chunker;
 pub mod page;
 #[cfg(feature = "encryption")]
 pub(crate) mod page_encryption;
diff --git a/parquet/src/column/page.rs b/parquet/src/column/page.rs
index f18b296c1c65..4cfc07a02883 100644
--- a/parquet/src/column/page.rs
+++ b/parquet/src/column/page.rs
@@ -406,7 +406,14 @@ pub trait PageReader: Iterator<Item = Result<Page>> + Send {
     /// [(#4327)]: https://github.com/apache/arrow-rs/pull/4327
     /// [(#4943)]: https://github.com/apache/arrow-rs/pull/4943
     fn at_record_boundary(&mut self) -> Result<bool> {
-        Ok(self.peek_next_page()?.is_none())
+        match self.peek_next_page()? {
+            // Last page in the column chunk - always a record boundary
+            None => Ok(true),
+            // A V2 data page is required by the parquet spec to start at a
+            // record boundary, so the current page ends at one.  V2 pages
+            // are identified by having `num_rows` set in their header.
+            Some(metadata) => Ok(metadata.num_rows.is_some()),
+        }
     }
 }
 
diff --git a/parquet/src/column/reader.rs b/parquet/src/column/reader.rs
index 387a0602a60d..29cb50185a58 100644
--- a/parquet/src/column/reader.rs
+++ b/parquet/src/column/reader.rs
@@ -1361,4 +1361,135 @@ mod tests {
             );
         }
     }
+
+    /// Regression test for <https://github.com/apache/arrow-rs/issues/9370>
+    ///
+    /// Reproduces the production scenario: all DataPage v2 pages for a
+    /// list column (rep_level=1) read without an offset index (i.e.
+    /// `at_record_boundary` returns false for non-last pages).
+    ///
+    /// When a prior operation (here `skip_records(1)`) loads a v2 page,
+    /// and a subsequent `skip_records` exhausts the remaining levels on
+    /// that page, the rep level decoder is left with `has_partial=true`.
+    /// Because `has_record_delimiter` is false, the partial is not
+    /// flushed during level-based processing. When the next v2 page is
+    /// then peeked with `num_rows` available, the whole-page-skip
+    /// shortcut must flush the pending partial first. Otherwise:
+    ///
+    /// 1. The skip over-counts (skips N+1 records instead of N), and
+    /// 2. The stale `has_partial` causes a subsequent `read_records` to
+    ///    produce a "phantom" record with 0 values.
+    #[test]
+    fn test_skip_records_v2_page_skip_accounts_for_partial() {
+        use crate::encodings::levels::LevelEncoder;
+
+        let max_rep_level: i16 = 1;
+        let max_def_level: i16 = 1;
+
+        // Column descriptor for a list element column (rep=1, def=1)
+        let primitive_type = SchemaType::primitive_type_builder("element", PhysicalType::INT32)
+            .with_repetition(Repetition::REQUIRED)
+            .build()
+            .unwrap();
+        let desc = Arc::new(ColumnDescriptor::new(
+            Arc::new(primitive_type),
+            max_def_level,
+            max_rep_level,
+            ColumnPath::new(vec!["list".to_string(), "element".to_string()]),
+        ));
+
+        // Helper: build a DataPage v2 for this list column.
+        let make_v2_page =
+            |rep_levels: &[i16], def_levels: &[i16], values: &[i32], num_rows: u32| -> Page {
+                let mut rep_enc = LevelEncoder::v2(max_rep_level, rep_levels.len());
+                rep_enc.put(rep_levels);
+                let rep_bytes = rep_enc.consume();
+
+                let mut def_enc = LevelEncoder::v2(max_def_level, def_levels.len());
+                def_enc.put(def_levels);
+                let def_bytes = def_enc.consume();
+
+                let val_bytes: Vec<u8> = values.iter().flat_map(|v| v.to_le_bytes()).collect();
+
+                let mut buf = Vec::new();
+                buf.extend_from_slice(&rep_bytes);
+                buf.extend_from_slice(&def_bytes);
+                buf.extend_from_slice(&val_bytes);
+
+                Page::DataPageV2 {
+                    buf: Bytes::from(buf),
+                    num_values: rep_levels.len() as u32,
+                    encoding: Encoding::PLAIN,
+                    num_nulls: 0,
+                    num_rows,
+                    def_levels_byte_len: def_bytes.len() as u32,
+                    rep_levels_byte_len: rep_bytes.len() as u32,
+                    is_compressed: false,
+                    statistics: None,
+                }
+            };
+
+        // All pages are DataPage v2 (matching the production scenario where
+        // parquet-rs writes only v2 data pages and no offset index is loaded,
+        // so at_record_boundary() returns false for non-last pages).
+
+        // Page 1 (v2): 2 records × 2 elements = [10,20], [30,40]
+        let page1 = make_v2_page(&[0, 1, 0, 1], &[1, 1, 1, 1], &[10, 20, 30, 40], 2);
+
+        // Page 2 (v2): 2 records × 2 elements = [50,60], [70,80]
+        let page2 = make_v2_page(&[0, 1, 0, 1], &[1, 1, 1, 1], &[50, 60, 70, 80], 2);
+
+        // Page 3 (v2): 1 record × 2 elements = [90,100]
+        let page3 = make_v2_page(&[0, 1], &[1, 1], &[90, 100], 1);
+
+        // 5 records total: [10,20], [30,40], [50,60], [70,80], [90,100]
+        let pages = VecDeque::from(vec![page1, page2, page3]);
+        let page_reader = InMemoryPageReader::new(pages);
+        let column_reader: ColumnReader = get_column_reader(desc, Box::new(page_reader));
+        let mut typed_reader = get_typed_column_reader::<Int32Type>(column_reader);
+
+        // Step 1 — skip 1 record:
+        //   Peek page 1: num_rows=2, remaining=1 → rows(2) > remaining(1),
+        //   so the page is LOADED (not whole-page-skipped).
+        //   Level-based skip consumes rep levels [0,1] for record [10,20],
+        //   stopping at the 0 that starts record [30,40].
+        let skipped = typed_reader.skip_records(1).unwrap();
+        assert_eq!(skipped, 1);
+
+        // Step 2 — skip 2 more records ([30,40] and [50,60]):
+        //   Mid-page in page 1 with 2 remaining levels [0,1] for [30,40].
+        //   skip_rep_levels(2, 2): the leading 0 does NOT act as a record
+        //   delimiter (has_partial=false, idx==0), so count_records returns
+        //   (true, 0, 2) — all levels consumed, has_partial=true, 0 records.
+        //
+        //   has_record_delimiter is false → no flush at page boundary.
+        //   Page 1 exhausted → peek page 2 (v2, num_rows=2).
+        //
+        //   With fix: flush_partial → remaining 2→1, page 2 NOT skipped
+        //   (rows=2 > remaining=1). Load page 2, skip 1 record [50,60].
+        //
+        //   Without fix: rows(2) <= remaining(2) → page 2 whole-page-skipped,
+        //   over-counting by 1. has_partial stays true (stale from page 1).
+        let skipped = typed_reader.skip_records(2).unwrap();
+        assert_eq!(skipped, 2);
+
+        // Step 3 — read 1 record:
+        let mut values = Vec::new();
+        let mut def_levels = Vec::new();
+        let mut rep_levels = Vec::new();
+
+        let (records, values_read, levels_read) = typed_reader
+            .read_records(1, Some(&mut def_levels), Some(&mut rep_levels), &mut values)
+            .unwrap();
+
+        // Without the fix: (1, 0, 0) — phantom record from stale has_partial;
+        //   the rep=0 on page 3 "completes" the phantom, yielding 0 values.
+        // With the fix:    (1, 2, 2) — correctly reads record [70, 80].
+        assert_eq!(records, 1, "should read exactly 1 record");
+        assert_eq!(levels_read, 2, "should read 2 levels for the record");
+        assert_eq!(values_read, 2, "should read 2 non-null values");
+        assert_eq!(values, vec![70, 80], "should contain 4th record's values");
+        assert_eq!(rep_levels, vec![0, 1], "rep levels for a 2-element list");
+        assert_eq!(def_levels, vec![1, 1], "def levels (all non-null)");
+    }
 }
diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs
index c014397f132e..4c3dbabc2132 100644
--- a/parquet/src/column/writer/mod.rs
+++ b/parquet/src/column/writer/mod.rs
@@ -100,6 +100,15 @@ impl ColumnWriter<'_> {
         downcast_writer!(self, typed, typed.get_estimated_total_bytes())
     }
 
+    /// Finalize the currently buffered values as a data page.
+    ///
+    /// This is used by content-defined chunking to force a page boundary at
+    /// content-determined positions.
+    #[cfg(feature = "arrow")]
+    pub(crate) fn add_data_page(&mut self) -> Result<()> {
+        downcast_writer!(self, typed, typed.add_data_page())
+    }
+
     /// Close this [`ColumnWriter`], returning the metadata for the column chunk.
     pub fn close(self) -> Result<ColumnCloseResult> {
         downcast_writer!(self, typed, typed.close())
@@ -1001,7 +1010,7 @@ impl<'a, E: ColumnValueEncoder> GenericColumnWriter<'a, E> {
 
     /// Adds data page.
     /// Data page is either buffered in case of dictionary encoding or written directly.
-    fn add_data_page(&mut self) -> Result<()> {
+    pub(crate) fn add_data_page(&mut self) -> Result<()> {
         // Extract encoded values
         let values_data = self.encoder.flush_data_page()?;
 
diff --git a/parquet/src/data_type.rs b/parquet/src/data_type.rs
index df5702d1bb00..d8c7b9201389 100644
--- a/parquet/src/data_type.rs
+++ b/parquet/src/data_type.rs
@@ -1331,10 +1331,10 @@ impl AsRef<[u8]> for FixedLenByteArray {
 
 /// Macro to reduce repetition in making type assertions on the physical type against `T`
 macro_rules! ensure_phys_ty {
-    ($($ty:pat_param)|+ , $err: literal) => {
+    ($($ty:pat_param)|+ , $($arg:tt)*) => {
         match T::get_physical_type() {
             $($ty => (),)*
-            _ => panic!($err),
+            _ => panic!($($arg)*),
         };
     }
 }
diff --git a/parquet/src/encodings/decoding.rs b/parquet/src/encodings/decoding.rs
index 58430820a9b6..7da21e6dd091 100644
--- a/parquet/src/encodings/decoding.rs
+++ b/parquet/src/encodings/decoding.rs
@@ -770,15 +770,48 @@ where
 
             // At this point we have read the deltas to `buffer` we now need to offset
             // these to get back to the original values that were encoded
-            for v in &mut buffer[read..read + batch_read] {
+            //
+            // Optimization: if the bit_width for the miniblock is 0, then we can employ
+            // a faster decoding method than setting `value[i] = value[i-1] + value[i] + min_delta`.
+            // Where min_delta is 0 (all values in the miniblock are the same), we can simply
+            // set all values to `self.last_value`. In the case of non-zero min_delta (values
+            // in the mini-block form an arithmetic progression) each value can be computed via
+            // `value[i] = (i + 1) * min_delta + last_value`. In both cases we remove the
+            // dependence on the preceding value.
+            // Kudos to @pitrou for the idea https://github.com/apache/arrow/pull/49296
+            let min_delta = self.min_delta.as_i64()?;
+            if bit_width == 0 {
+                if min_delta == 0 {
+                    buffer[read..read + batch_read].fill(self.last_value);
+                } else {
+                    // the c++ version multiplies min_delta by the iter index, but doing
+                    // wrapping_mul through T::T was a bit slower. this is still
+                    // faster than before.
+                    let mut delta = self.min_delta;
+                    for v in &mut buffer[read..read + batch_read] {
+                        *v = self.last_value.wrapping_add(&delta);
+                        delta = delta.wrapping_add(&self.min_delta);
+                    }
+
+                    self.last_value = buffer[read + batch_read - 1];
+                }
+            } else {
                 // It is OK for deltas to contain "overflowed" values after encoding,
                 // e.g. i64::MAX - i64::MIN, so we use `wrapping_add` to "overflow" again and
                 // restore original value.
-                *v = v
-                    .wrapping_add(&self.min_delta)
-                    .wrapping_add(&self.last_value);
-
-                self.last_value = *v;
+                if min_delta == 0 {
+                    for v in &mut buffer[read..read + batch_read] {
+                        *v = v.wrapping_add(&self.last_value);
+                        self.last_value = *v;
+                    }
+                } else {
+                    for v in &mut buffer[read..read + batch_read] {
+                        *v = v
+                            .wrapping_add(&self.min_delta)
+                            .wrapping_add(&self.last_value);
+                        self.last_value = *v;
+                    }
+                }
             }
 
             read += batch_read;
@@ -840,12 +873,33 @@ where
                 ));
             }
 
-            for v in &mut skip_buffer[0..skip_count] {
-                *v = v
-                    .wrapping_add(&self.min_delta)
-                    .wrapping_add(&self.last_value);
+            // see commentary in self.get() above regarding optimizations
+            let min_delta = self.min_delta.as_i64()?;
+            if bit_width == 0 {
+                // if min_delta == 0, there's nothing to do. self.last_value is unchanged
+                if min_delta != 0 {
+                    let mut delta = self.min_delta;
+                    for v in &mut skip_buffer[0..skip_count] {
+                        *v = self.last_value.wrapping_add(&delta);
+                        delta = delta.wrapping_add(&self.min_delta);
+                    }
+
+                    self.last_value = skip_buffer[skip_count - 1];
+                }
+            } else if min_delta == 0 {
+                for v in &mut skip_buffer[0..skip_count] {
+                    *v = v.wrapping_add(&self.last_value);
+
+                    self.last_value = *v;
+                }
+            } else {
+                for v in &mut skip_buffer[0..skip_count] {
+                    *v = v
+                        .wrapping_add(&self.min_delta)
+                        .wrapping_add(&self.last_value);
 
-                self.last_value = *v;
+                    self.last_value = *v;
+                }
             }
 
             skip += mini_block_should_skip;
@@ -1802,6 +1856,76 @@ mod tests {
         );
     }
 
+    #[test]
+    fn test_delta_bit_packed_int32_single_value_large() {
+        let block_data = vec![3; 10240];
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_single_value_skip_large() {
+        let block_data = vec![3; 10240];
+        test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
+        test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_increasing_value_large() {
+        let block_data = (0i32..10240).collect();
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_increasing_value_skip_large() {
+        let block_data = (0i32..10240).collect::<Vec<i32>>();
+        test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
+        test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_stepped_value_large() {
+        let block_data = (0i32..10240).map(|i| i / 2).collect();
+        test_delta_bit_packed_decode::<Int32Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_stepped_value_skip_large() {
+        let block_data = (0i32..10240).map(|i| i / 2).collect::<Vec<i32>>();
+        test_skip::<Int32Type>(block_data.clone(), Encoding::DELTA_BINARY_PACKED, 50);
+        test_skip::<Int32Type>(block_data, Encoding::DELTA_BINARY_PACKED, 5000);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int32_mixed_large() {
+        // should be enough for 4 mini-blocks plus a little so we get some
+        // mixed mini-blocks
+        const BLOCK_SIZE: i32 = 133;
+        let block1_data = (0..BLOCK_SIZE).map(|i| (i * 7) % 11).collect();
+        let block2_data = vec![3; BLOCK_SIZE as usize];
+        let block3_data = (0..BLOCK_SIZE).map(|i| (i * 5) % 13).collect();
+        let block4_data = (0..BLOCK_SIZE).collect();
+        let block5_data = (0..BLOCK_SIZE).map(|i| (i * 3) % 17).collect();
+        test_delta_bit_packed_decode::<Int32Type>(vec![
+            block1_data,
+            block2_data,
+            block3_data,
+            block4_data,
+            block5_data,
+        ]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int64_single_value_large() {
+        let block_data = vec![5; 10240];
+        test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
+    }
+
+    #[test]
+    fn test_delta_bit_packed_int64_increasing_value_large() {
+        let block_data = (0i64..10240).collect();
+        test_delta_bit_packed_decode::<Int64Type>(vec![block_data]);
+    }
+
     #[test]
     fn test_delta_byte_array_same_arrays() {
         let data = vec![
diff --git a/parquet/src/encodings/encoding/mod.rs b/parquet/src/encodings/encoding/mod.rs
index e5e74ac53fa7..eeabcf4ba5ce 100644
--- a/parquet/src/encodings/encoding/mod.rs
+++ b/parquet/src/encodings/encoding/mod.rs
@@ -522,20 +522,18 @@ trait DeltaBitPackEncoderConversion<T: DataType> {
     fn subtract_u64(&self, left: i64, right: i64) -> u64;
 }
 
+const DELTA_BIT_PACK_TYPE_ERROR: &str =
+    "DeltaBitPackDecoder only supports Int32Type, UInt32Type, Int64Type, and UInt64Type";
+
 impl<T: DataType> DeltaBitPackEncoderConversion<T> for DeltaBitPackEncoder<T> {
     #[inline]
     fn assert_supported_type() {
-        ensure_phys_ty!(
-            Type::INT32 | Type::INT64,
-            "DeltaBitPackDecoder only supports Int32Type and Int64Type"
-        );
+        ensure_phys_ty!(Type::INT32 | Type::INT64, "{}", DELTA_BIT_PACK_TYPE_ERROR);
     }
 
     #[inline]
     fn as_i64(&self, values: &[T::T], index: usize) -> i64 {
-        values[index]
-            .as_i64()
-            .expect("DeltaBitPackDecoder only supports Int32Type and Int64Type")
+        values[index].as_i64().expect(DELTA_BIT_PACK_TYPE_ERROR)
     }
 
     #[inline]
@@ -544,7 +542,7 @@ impl<T: DataType> DeltaBitPackEncoderConversion<T> for DeltaBitPackEncoder<T> {
         match T::get_physical_type() {
             Type::INT32 => (left as i32).wrapping_sub(right as i32) as i64,
             Type::INT64 => left.wrapping_sub(right),
-            _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"),
+            _ => panic!("{}", DELTA_BIT_PACK_TYPE_ERROR),
         }
     }
 
@@ -554,7 +552,7 @@ impl<T: DataType> DeltaBitPackEncoderConversion<T> for DeltaBitPackEncoder<T> {
             // Conversion of i32 -> u32 -> u64 is to avoid non-zero left most bytes in int repr
             Type::INT32 => (left as i32).wrapping_sub(right as i32) as u32 as u64,
             Type::INT64 => left.wrapping_sub(right) as u64,
-            _ => panic!("DeltaBitPackDecoder only supports Int32Type and Int64Type"),
+            _ => panic!("{}", DELTA_BIT_PACK_TYPE_ERROR),
         }
     }
 }
diff --git a/parquet/src/file/metadata/push_decoder.rs b/parquet/src/file/metadata/push_decoder.rs
index abc788426260..e322525b7193 100644
--- a/parquet/src/file/metadata/push_decoder.rs
+++ b/parquet/src/file/metadata/push_decoder.rs
@@ -308,7 +308,7 @@ impl ParquetMetaDataPushDecoder {
 
     #[cfg(feature = "encryption")]
     /// Provide decryption properties for decoding encrypted Parquet files
-    pub(crate) fn with_file_decryption_properties(
+    pub fn with_file_decryption_properties(
         mut self,
         file_decryption_properties: Option<std::sync::Arc<FileDecryptionProperties>>,
     ) -> Self {
diff --git a/parquet/src/file/metadata/thrift/mod.rs b/parquet/src/file/metadata/thrift/mod.rs
index ddb5aa16b068..88cb96f35555 100644
--- a/parquet/src/file/metadata/thrift/mod.rs
+++ b/parquet/src/file/metadata/thrift/mod.rs
@@ -192,20 +192,19 @@ fn convert_stats(
     use crate::file::statistics::Statistics as FStatistics;
     Ok(match thrift_stats {
         Some(stats) => {
-            // Number of nulls recorded, when it is not available, we just mark it as 0.
-            // TODO this should be `None` if there is no information about NULLS.
-            // see https://github.com/apache/arrow-rs/pull/6216/files
-            let null_count = stats.null_count.unwrap_or(0);
-
-            if null_count < 0 {
-                return Err(general_err!(
-                    "Statistics null count is negative {}",
-                    null_count
-                ));
-            }
-
             // Generic null count.
-            let null_count = Some(null_count as u64);
+            let null_count = stats
+                .null_count
+                .map(|null_count| {
+                    if null_count < 0 {
+                        return Err(general_err!(
+                            "Statistics null count is negative {}",
+                            null_count
+                        ));
+                    }
+                    Ok(null_count as u64)
+                })
+                .transpose()?;
             // Generic distinct count (count of distinct values occurring)
             let distinct_count = stats.distinct_count.map(|value| value as u64);
             // Whether or not statistics use deprecated min/max fields.
@@ -1722,6 +1721,7 @@ write_thrift_field!(RustBoundingBox, FieldType::Struct);
 
 #[cfg(test)]
 pub(crate) mod tests {
+    use crate::basic::Type as PhysicalType;
     use crate::errors::Result;
     use crate::file::metadata::thrift::{BoundingBox, SchemaElement, write_schema};
     use crate::file::metadata::{ColumnChunkMetaData, ParquetMetaDataOptions, RowGroupMetaData};
@@ -1730,7 +1730,8 @@ pub(crate) mod tests {
         ElementType, ThriftCompactOutputProtocol, ThriftSliceInputProtocol, read_thrift_vec,
     };
     use crate::schema::types::{
-        ColumnDescriptor, SchemaDescriptor, TypePtr, num_nodes, parquet_schema_from_array,
+        ColumnDescriptor, ColumnPath, SchemaDescriptor, TypePtr, num_nodes,
+        parquet_schema_from_array,
     };
     use std::sync::Arc;
 
@@ -1828,4 +1829,48 @@ pub(crate) mod tests {
             mmax: Some(42.0.into()),
         });
     }
+
+    #[test]
+    fn test_convert_stats_preserves_missing_null_count() {
+        let primitive =
+            crate::schema::types::Type::primitive_type_builder("col", PhysicalType::INT32)
+                .build()
+                .unwrap();
+        let column_descr = Arc::new(ColumnDescriptor::new(
+            Arc::new(primitive),
+            0,
+            0,
+            ColumnPath::new(vec![]),
+        ));
+
+        let none_null_count = super::Statistics {
+            max: None,
+            min: None,
+            null_count: None,
+            distinct_count: None,
+            max_value: None,
+            min_value: None,
+            is_max_value_exact: None,
+            is_min_value_exact: None,
+        };
+        let decoded_none = super::convert_stats(&column_descr, Some(none_null_count))
+            .unwrap()
+            .unwrap();
+        assert_eq!(decoded_none.null_count_opt(), None);
+
+        let zero_null_count = super::Statistics {
+            max: None,
+            min: None,
+            null_count: Some(0),
+            distinct_count: None,
+            max_value: None,
+            min_value: None,
+            is_max_value_exact: None,
+            is_min_value_exact: None,
+        };
+        let decoded_zero = super::convert_stats(&column_descr, Some(zero_null_count))
+            .unwrap()
+            .unwrap();
+        assert_eq!(decoded_zero.null_count_opt(), Some(0));
+    }
 }
diff --git a/parquet/src/file/properties.rs b/parquet/src/file/properties.rs
index ae21de304404..640a7a075d2f 100644
--- a/parquet/src/file/properties.rs
+++ b/parquet/src/file/properties.rs
@@ -61,6 +61,64 @@ pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option<usize> = Some(64);
 pub const DEFAULT_OFFSET_INDEX_DISABLED: bool = false;
 /// Default values for [`WriterProperties::coerce_types`]
 pub const DEFAULT_COERCE_TYPES: bool = false;
+/// Default minimum chunk size for content-defined chunking: 256 KiB.
+pub const DEFAULT_CDC_MIN_CHUNK_SIZE: usize = 256 * 1024;
+/// Default maximum chunk size for content-defined chunking: 1024 KiB.
+pub const DEFAULT_CDC_MAX_CHUNK_SIZE: usize = 1024 * 1024;
+/// Default normalization level for content-defined chunking.
+pub const DEFAULT_CDC_NORM_LEVEL: i32 = 0;
+
+/// EXPERIMENTAL: Options for content-defined chunking (CDC).
+///
+/// Content-defined chunking is an experimental feature that optimizes parquet
+/// files for content addressable storage (CAS) systems by writing data pages
+/// according to content-defined chunk boundaries. This allows for more
+/// efficient deduplication of data across files, hence more efficient network
+/// transfers and storage.
+///
+/// Each content-defined chunk is written as a separate parquet data page. The
+/// following options control the chunks' size and the chunking process. Note
+/// that the chunk size is calculated based on the logical value of the data,
+/// before any encoding or compression is applied.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub struct CdcOptions {
+    /// Minimum chunk size in bytes, default is 256 KiB.
+    /// The rolling hash will not be updated until this size is reached for each chunk.
+    /// Note that all data sent through the hash function is counted towards the chunk
+    /// size, including definition and repetition levels if present.
+    pub min_chunk_size: usize,
+    /// Maximum chunk size in bytes, default is 1024 KiB.
+    /// The chunker will create a new chunk whenever the chunk size exceeds this value.
+    /// Note that the parquet writer has a related [`data_page_size_limit`] property that
+    /// controls the maximum size of a parquet data page after encoding. While setting
+    /// `data_page_size_limit` to a smaller value than `max_chunk_size` doesn't affect
+    /// the chunking effectiveness, it results in more small parquet data pages.
+    ///
+    /// [`data_page_size_limit`]: WriterPropertiesBuilder::set_data_page_size_limit
+    pub max_chunk_size: usize,
+    /// Number of bit adjustment to the gearhash mask in order to center the chunk size
+    /// around the average size more aggressively, default is 0.
+    /// Increasing the normalization level increases the probability of finding a chunk,
+    /// improving the deduplication ratio, but also increasing the number of small chunks
+    /// resulting in many small parquet data pages. The default value provides a good
+    /// balance between deduplication ratio and fragmentation.
+    /// Use norm_level=1 or norm_level=2 to reach a higher deduplication ratio at the
+    /// expense of fragmentation. Negative values can also be used to reduce the
+    /// probability of finding a chunk, resulting in larger chunks and fewer data pages.
+    /// Note that values outside [-3, 3] are not recommended, prefer using the default
+    /// value of 0 for most use cases.
+    pub norm_level: i32,
+}
+
+impl Default for CdcOptions {
+    fn default() -> Self {
+        Self {
+            min_chunk_size: DEFAULT_CDC_MIN_CHUNK_SIZE,
+            max_chunk_size: DEFAULT_CDC_MAX_CHUNK_SIZE,
+            norm_level: DEFAULT_CDC_NORM_LEVEL,
+        }
+    }
+}
 
 /// Parquet writer version.
 ///
@@ -168,6 +226,7 @@ pub struct WriterProperties {
     column_index_truncate_length: Option<usize>,
     statistics_truncate_length: Option<usize>,
     coerce_types: bool,
+    content_defined_chunking: Option<CdcOptions>,
     #[cfg(feature = "encryption")]
     pub(crate) file_encryption_properties: Option<Arc<FileEncryptionProperties>>,
 }
@@ -364,6 +423,13 @@ impl WriterProperties {
         self.coerce_types
     }
 
+    /// EXPERIMENTAL: Returns content-defined chunking options, or `None` if CDC is disabled.
+    ///
+    /// For more details see [`WriterPropertiesBuilder::set_content_defined_chunking`]
+    pub fn content_defined_chunking(&self) -> Option<&CdcOptions> {
+        self.content_defined_chunking.as_ref()
+    }
+
     /// Returns encoding for a data page, when dictionary encoding is enabled.
     ///
     /// This is not configurable.
@@ -487,6 +553,7 @@ pub struct WriterPropertiesBuilder {
     column_index_truncate_length: Option<usize>,
     statistics_truncate_length: Option<usize>,
     coerce_types: bool,
+    content_defined_chunking: Option<CdcOptions>,
     #[cfg(feature = "encryption")]
     file_encryption_properties: Option<Arc<FileEncryptionProperties>>,
 }
@@ -510,6 +577,7 @@ impl Default for WriterPropertiesBuilder {
             column_index_truncate_length: DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
             statistics_truncate_length: DEFAULT_STATISTICS_TRUNCATE_LENGTH,
             coerce_types: DEFAULT_COERCE_TYPES,
+            content_defined_chunking: None,
             #[cfg(feature = "encryption")]
             file_encryption_properties: None,
         }
@@ -535,6 +603,7 @@ impl WriterPropertiesBuilder {
             column_index_truncate_length: self.column_index_truncate_length,
             statistics_truncate_length: self.statistics_truncate_length,
             coerce_types: self.coerce_types,
+            content_defined_chunking: self.content_defined_chunking,
             #[cfg(feature = "encryption")]
             file_encryption_properties: self.file_encryption_properties,
         }
@@ -750,6 +819,37 @@ impl WriterPropertiesBuilder {
         self
     }
 
+    /// EXPERIMENTAL: Sets content-defined chunking options, or disables CDC with `None`.
+    ///
+    /// When enabled, data page boundaries are determined by a rolling hash of the
+    /// column values, so unchanged data produces identical byte sequences across
+    /// file versions. This enables efficient deduplication on content-addressable
+    /// storage systems.
+    ///
+    /// Only supported through the Arrow writer interface ([`ArrowWriter`]).
+    ///
+    /// # Panics
+    ///
+    /// Panics if `min_chunk_size == 0` or `max_chunk_size <= min_chunk_size`.
+    ///
+    /// [`ArrowWriter`]: crate::arrow::arrow_writer::ArrowWriter
+    pub fn set_content_defined_chunking(mut self, options: Option<CdcOptions>) -> Self {
+        if let Some(ref options) = options {
+            assert!(
+                options.min_chunk_size > 0,
+                "min_chunk_size must be positive"
+            );
+            assert!(
+                options.max_chunk_size > options.min_chunk_size,
+                "max_chunk_size ({}) must be greater than min_chunk_size ({})",
+                options.max_chunk_size,
+                options.min_chunk_size
+            );
+        }
+        self.content_defined_chunking = options;
+        self
+    }
+
     /// Sets FileEncryptionProperties (defaults to `None`)
     #[cfg(feature = "encryption")]
     pub fn with_file_encryption_properties(
@@ -1033,6 +1133,7 @@ impl From<WriterProperties> for WriterPropertiesBuilder {
             column_index_truncate_length: props.column_index_truncate_length,
             statistics_truncate_length: props.statistics_truncate_length,
             coerce_types: props.coerce_types,
+            content_defined_chunking: props.content_defined_chunking,
             #[cfg(feature = "encryption")]
             file_encryption_properties: props.file_encryption_properties,
         }
@@ -1763,4 +1864,18 @@ mod tests {
             }
         }
     }
+
+    #[test]
+    fn test_cdc_options_equality() {
+        let opts = CdcOptions::default();
+        assert_eq!(opts, CdcOptions::default());
+
+        let custom = CdcOptions {
+            min_chunk_size: 1024,
+            max_chunk_size: 8192,
+            norm_level: 1,
+        };
+        assert_eq!(custom, custom);
+        assert_ne!(opts, custom);
+    }
 }
diff --git a/parquet/src/file/serialized_reader.rs b/parquet/src/file/serialized_reader.rs
index b3b6383f78bb..254ccb779a4a 100644
--- a/parquet/src/file/serialized_reader.rs
+++ b/parquet/src/file/serialized_reader.rs
@@ -1158,7 +1158,12 @@ impl<R: ChunkReader> PageReader for SerializedPageReader<R> {
 
     fn at_record_boundary(&mut self) -> Result<bool> {
         match &mut self.state {
-            SerializedPageReaderState::Values { .. } => Ok(self.peek_next_page()?.is_none()),
+            SerializedPageReaderState::Values { .. } => match self.peek_next_page()? {
+                None => Ok(true),
+                // V2 data pages must start at record boundaries per the parquet
+                // spec, so the current page ends at one.
+                Some(metadata) => Ok(metadata.num_rows.is_some()),
+            },
             SerializedPageReaderState::Pages { .. } => Ok(true),
         }
     }
diff --git a/parquet/src/file/statistics.rs b/parquet/src/file/statistics.rs
index a813e82d13f2..9682fd54b8df 100644
--- a/parquet/src/file/statistics.rs
+++ b/parquet/src/file/statistics.rs
@@ -125,19 +125,18 @@ pub(crate) fn from_thrift_page_stats(
 ) -> Result<Option<Statistics>> {
     Ok(match thrift_stats {
         Some(stats) => {
-            // Number of nulls recorded, when it is not available, we just mark it as 0.
-            // TODO this should be `None` if there is no information about NULLS.
-            // see https://github.com/apache/arrow-rs/pull/6216/files
-            let null_count = stats.null_count.unwrap_or(0);
-
-            if null_count < 0 {
-                return Err(ParquetError::General(format!(
-                    "Statistics null count is negative {null_count}",
-                )));
-            }
-
             // Generic null count.
-            let null_count = Some(null_count as u64);
+            let null_count = stats
+                .null_count
+                .map(|null_count| {
+                    if null_count < 0 {
+                        return Err(ParquetError::General(format!(
+                            "Statistics null count is negative {null_count}",
+                        )));
+                    }
+                    Ok(null_count as u64)
+                })
+                .transpose()?;
             // Generic distinct count (count of distinct values occurring)
             let distinct_count = stats.distinct_count.map(|value| value as u64);
             // Whether or not statistics use deprecated min/max fields.
@@ -431,9 +430,20 @@ impl Statistics {
     /// Returns number of null values for the column, if known.
     /// Note that this includes all nulls when column is part of the complex type.
     ///
-    /// Note this API returns Some(0) even if the null count was not present
-    /// in the statistics.
-    /// See <https://github.com/apache/arrow-rs/pull/6216/files>
+    /// Note: Versions of this library prior to `58.1.0` returned `0` if the null count
+    /// was not available. This method now returns `None` in that case.
+    ///
+    /// Also, versions of this library prior to `53.1.0` did not store a null count
+    /// statistic when the null count was `0`.
+    ///
+    /// It is unsound to assume that missing nullcount stats mean the column contains no nulls,
+    /// but code that depends on the old behavior can restore it by defaulting to zero:
+    ///
+    /// ```no_run
+    /// # use parquet::file::statistics::Statistics;
+    /// # let statistics: Statistics = todo!();
+    /// let null_count = statistics.null_count_opt().unwrap_or(0);
+    /// ```
     pub fn null_count_opt(&self) -> Option<u64> {
         statistics_enum_func![self, null_count_opt]
     }
@@ -1064,21 +1074,7 @@ mod tests {
         let round_tripped = from_thrift_page_stats(Type::BOOLEAN, Some(thrift_stats))
             .unwrap()
             .unwrap();
-        // TODO: remove branch when we no longer support assuming null_count==None in the thrift
-        // means null_count = Some(0)
-        if null_count.is_none() {
-            assert_ne!(round_tripped, statistics);
-            assert!(round_tripped.null_count_opt().is_some());
-            assert_eq!(round_tripped.null_count_opt(), Some(0));
-            assert_eq!(round_tripped.min_bytes_opt(), statistics.min_bytes_opt());
-            assert_eq!(round_tripped.max_bytes_opt(), statistics.max_bytes_opt());
-            assert_eq!(
-                round_tripped.distinct_count_opt(),
-                statistics.distinct_count_opt()
-            );
-        } else {
-            assert_eq!(round_tripped, statistics);
-        }
+        assert_eq!(round_tripped, statistics);
     }
 
     fn make_bool_stats(distinct_count: Option<u64>, null_count: Option<u64>) -> Statistics {
diff --git a/parquet/src/lib.rs b/parquet/src/lib.rs
index 98106a2c1059..916892fafeae 100644
--- a/parquet/src/lib.rs
+++ b/parquet/src/lib.rs
@@ -67,6 +67,28 @@
 //! * [`ArrowColumnWriter`] for writing using multiple threads,
 //! * [`RowFilter`] to apply filters during decode
 //!
+//! ### EXPERIMENTAL: Content-Defined Chunking
+//!
+//! [`ArrowWriter`] supports content-defined chunking (CDC), which creates data page
+//! boundaries based on content rather than fixed sizes. CDC enables efficient
+//! deduplication in content-addressable storage (CAS) systems: when the same data
+//! appears in successive file versions, it will produce identical byte sequences that
+//! CAS backends can deduplicate.
+//!
+//! Enable CDC via [`WriterProperties`]:
+//!
+//! ```rust
+//! # use parquet::file::properties::{WriterProperties, CdcOptions};
+//! let props = WriterProperties::builder()
+//!     .set_content_defined_chunking(Some(CdcOptions::default()))
+//!     .build();
+//! ```
+//!
+//! See [`CdcOptions`] for chunk size and normalization parameters.
+//!
+//! [`WriterProperties`]: file::properties::WriterProperties
+//! [`CdcOptions`]: file::properties::CdcOptions
+//!
 //! [`ArrowWriter`]: arrow::arrow_writer::ArrowWriter
 //! [`ParquetRecordBatchReaderBuilder`]: arrow::arrow_reader::ParquetRecordBatchReaderBuilder
 //! [`ParquetPushDecoder`]: arrow::push_decoder::ParquetPushDecoder
diff --git a/parquet/src/schema/types.rs b/parquet/src/schema/types.rs
index 85f3ed48972c..2925557e7b86 100644
--- a/parquet/src/schema/types.rs
+++ b/parquet/src/schema/types.rs
@@ -853,6 +853,9 @@ pub struct ColumnDescriptor {
     /// The maximum repetition level for this column
     max_rep_level: i16,
 
+    /// The definition level at the nearest REPEATED ancestor, or 0 if none.
+    repeated_ancestor_def_level: i16,
+
     /// The path of this column. For instance, "a.b.c.d".
     path: ColumnPath,
 }
@@ -872,11 +875,22 @@ impl ColumnDescriptor {
         max_def_level: i16,
         max_rep_level: i16,
         path: ColumnPath,
+    ) -> Self {
+        Self::new_with_repeated_ancestor(primitive_type, max_def_level, max_rep_level, path, 0)
+    }
+
+    pub(crate) fn new_with_repeated_ancestor(
+        primitive_type: TypePtr,
+        max_def_level: i16,
+        max_rep_level: i16,
+        path: ColumnPath,
+        repeated_ancestor_def_level: i16,
     ) -> Self {
         Self {
             primitive_type,
             max_def_level,
             max_rep_level,
+            repeated_ancestor_def_level,
             path,
         }
     }
@@ -893,6 +907,12 @@ impl ColumnDescriptor {
         self.max_rep_level
     }
 
+    /// Returns the definition level at the nearest REPEATED ancestor, or 0 if none.
+    #[inline]
+    pub fn repeated_ancestor_def_level(&self) -> i16 {
+        self.repeated_ancestor_def_level
+    }
+
     /// Returns [`ColumnPath`] for this column.
     pub fn path(&self) -> &ColumnPath {
         &self.path
@@ -1069,7 +1089,16 @@ impl SchemaDescriptor {
         let mut path = Vec::with_capacity(INIT_SCHEMA_DEPTH);
         for (root_idx, f) in tp.get_fields().iter().enumerate() {
             path.clear();
-            build_tree(f, root_idx, 0, 0, &mut leaves, &mut leaf_to_base, &mut path);
+            build_tree(
+                f,
+                root_idx,
+                0,
+                0,
+                0,
+                &mut leaves,
+                &mut leaf_to_base,
+                &mut path,
+            );
         }
 
         Self {
@@ -1191,11 +1220,13 @@ fn count_leaves(tp: &TypePtr, n_leaves: &mut usize) {
     }
 }
 
+#[allow(clippy::too_many_arguments)]
 fn build_tree<'a>(
     tp: &'a TypePtr,
     root_idx: usize,
     mut max_rep_level: i16,
     mut max_def_level: i16,
+    mut repeated_ancestor_def_level: i16,
     leaves: &mut Vec<ColumnDescPtr>,
     leaf_to_base: &mut Vec<usize>,
     path_so_far: &mut Vec<&'a str>,
@@ -1210,6 +1241,7 @@ fn build_tree<'a>(
         Repetition::REPEATED => {
             max_def_level += 1;
             max_rep_level += 1;
+            repeated_ancestor_def_level = max_def_level;
         }
         _ => {}
     }
@@ -1218,12 +1250,14 @@ fn build_tree<'a>(
         Type::PrimitiveType { .. } => {
             let mut path: Vec<String> = vec![];
             path.extend(path_so_far.iter().copied().map(String::from));
-            leaves.push(Arc::new(ColumnDescriptor::new(
+            let desc = ColumnDescriptor::new_with_repeated_ancestor(
                 tp.clone(),
                 max_def_level,
                 max_rep_level,
                 ColumnPath::new(path),
-            )));
+                repeated_ancestor_def_level,
+            );
+            leaves.push(Arc::new(desc));
             leaf_to_base.push(root_idx);
         }
         Type::GroupType { fields, .. } => {
@@ -1233,6 +1267,7 @@ fn build_tree<'a>(
                     root_idx,
                     max_rep_level,
                     max_def_level,
+                    repeated_ancestor_def_level,
                     leaves,
                     leaf_to_base,
                     path_so_far,
@@ -1941,6 +1976,122 @@ mod tests {
         assert_eq!(descr.column(3).max_rep_level(), 1);
     }
 
+    #[test]
+    fn test_schema_build_tree_repeated_ancestor_def_level() {
+        // Flat columns: no REPEATED ancestor → repeated_ancestor_def_level = 0
+        let message_type = "
+    message m {
+      REQUIRED INT32 a;
+      OPTIONAL INT32 b;
+      OPTIONAL group s {
+        OPTIONAL INT32 x;
+      }
+    }
+    ";
+        let schema = parse_message_type(message_type).expect("should parse schema");
+        let descr = SchemaDescriptor::new(Arc::new(schema));
+        assert_eq!(descr.column(0).repeated_ancestor_def_level(), 0); // a
+        assert_eq!(descr.column(1).repeated_ancestor_def_level(), 0); // b
+        assert_eq!(descr.column(2).repeated_ancestor_def_level(), 0); // s.x
+
+        // Standard list: OPTIONAL outer, REPEATED group, OPTIONAL element
+        // repeated_ancestor_def_level is the def_level at the REPEATED group (= 2)
+        let message_type = "
+    message m {
+      OPTIONAL group c (LIST) {
+        REPEATED group list {
+          OPTIONAL INT32 element;
+        }
+      }
+    }
+    ";
+        let schema = parse_message_type(message_type).expect("should parse schema");
+        let descr = SchemaDescriptor::new(Arc::new(schema));
+        // c(optional)=1, list(repeated)=2, element(optional)=3
+        assert_eq!(descr.column(0).max_def_level(), 3);
+        assert_eq!(descr.column(0).max_rep_level(), 1);
+        assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2);
+
+        // Required list: REQUIRED outer, REPEATED group, REQUIRED element
+        // No OPTIONAL nodes between REPEATED and leaf, so repeated_ancestor_def_level == max_def_level
+        let message_type = "
+    message m {
+      REQUIRED group c (LIST) {
+        REPEATED group list {
+          REQUIRED INT32 element;
+        }
+      }
+    }
+    ";
+        let schema = parse_message_type(message_type).expect("should parse schema");
+        let descr = SchemaDescriptor::new(Arc::new(schema));
+        // list(repeated)=1, element(required)=1
+        assert_eq!(descr.column(0).max_def_level(), 1);
+        assert_eq!(descr.column(0).max_rep_level(), 1);
+        assert_eq!(descr.column(0).repeated_ancestor_def_level(), 1);
+
+        // Nested lists: innermost REPEATED wins
+        let message_type = "
+    message m {
+      OPTIONAL group outer (LIST) {
+        REPEATED group list {
+          OPTIONAL group inner (LIST) {
+            REPEATED group list2 {
+              OPTIONAL INT32 element;
+            }
+          }
+        }
+      }
+    }
+    ";
+        let schema = parse_message_type(message_type).expect("should parse schema");
+        let descr = SchemaDescriptor::new(Arc::new(schema));
+        // outer(opt)=1, list(rep)=2, inner(opt)=3, list2(rep)=4, element(opt)=5
+        assert_eq!(descr.column(0).max_def_level(), 5);
+        assert_eq!(descr.column(0).max_rep_level(), 2);
+        assert_eq!(descr.column(0).repeated_ancestor_def_level(), 4);
+
+        // Struct inside list: all sibling leaves share the same repeated_ancestor_def_level
+        let message_type = "
+    message m {
+      OPTIONAL group bag (LIST) {
+        REPEATED group list {
+          REQUIRED group item {
+            OPTIONAL INT32 x;
+            REQUIRED INT32 y;
+          }
+        }
+      }
+    }
+    ";
+        let schema = parse_message_type(message_type).expect("should parse schema");
+        let descr = SchemaDescriptor::new(Arc::new(schema));
+        // bag(opt)=1, list(rep)=2, item(req)=2, x(opt)=3
+        assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); // bag.list.item.x
+        // bag(opt)=1, list(rep)=2, item(req)=2, y(req)=2
+        assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); // bag.list.item.y
+
+        // Map type: key (required) and value (optional) under the same REPEATED group
+        let message_type = "
+    message m {
+      OPTIONAL group my_map (MAP) {
+        REPEATED group key_value {
+          REQUIRED BYTE_ARRAY key (UTF8);
+          OPTIONAL INT32 value;
+        }
+      }
+    }
+    ";
+        let schema = parse_message_type(message_type).expect("should parse schema");
+        let descr = SchemaDescriptor::new(Arc::new(schema));
+        // my_map(opt)=1, key_value(rep)=2, key(req)=2
+        assert_eq!(descr.column(0).max_def_level(), 2);
+        assert_eq!(descr.column(0).repeated_ancestor_def_level(), 2); // key: max_def == repeated_ancestor
+        // my_map(opt)=1, key_value(rep)=2, value(opt)=3
+        assert_eq!(descr.column(1).max_def_level(), 3);
+        assert_eq!(descr.column(1).repeated_ancestor_def_level(), 2); // value: max_def > repeated_ancestor
+    }
+
     #[test]
     #[should_panic(expected = "Cannot call get_physical_type() on a non-primitive type")]
     fn test_get_physical_type_panic() {
diff --git a/parquet/src/util/bit_util.rs b/parquet/src/util/bit_util.rs
index 3a26603fabc4..262564825864 100644
--- a/parquet/src/util/bit_util.rs
+++ b/parquet/src/util/bit_util.rs
@@ -659,9 +659,15 @@ impl BitReader {
     ///
     /// Returns `None` if there's not enough bytes in the stream. `Some` otherwise.
     pub fn get_vlq_int(&mut self) -> Option<i64> {
+        // Align to byte boundary once, then read bytes directly
+        self.byte_offset = self.get_byte_offset();
+        self.bit_offset = 0;
+
+        let buf = &self.buffer[self.byte_offset..];
         let mut shift = 0;
         let mut v: i64 = 0;
-        while let Some(byte) = self.get_aligned::<u8>(1) {
+
+        for (i, &byte) in buf.iter().enumerate() {
             v |= ((byte & 0x7F) as i64) << shift;
             shift += 7;
             assert!(
@@ -669,6 +675,7 @@ impl BitReader {
                 "Num of bytes exceed MAX_VLQ_BYTE_LEN ({MAX_VLQ_BYTE_LEN})"
             );
             if byte & 0x80 == 0 {
+                self.byte_offset += i + 1;
                 return Some(v);
             }
         }
diff --git a/parquet/tests/arrow_reader/io/async_reader.rs b/parquet/tests/arrow_reader/io/async_reader.rs
index 8022335da0ef..db06dda8ee89 100644
--- a/parquet/tests/arrow_reader/io/async_reader.rs
+++ b/parquet/tests/arrow_reader/io/async_reader.rs
@@ -275,9 +275,7 @@ async fn test_read_multiple_row_filter() {
             "Read Multi:",
             "  Row Group 1, column 'a': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
             "Read Multi:",
-            "  Row Group 1, column 'b': DictionaryPage   (1617 bytes, 1 requests) [data]",
-            "  Row Group 1, column 'b': DataPage(0)      (113 bytes , 1 requests) [data]",
-            "  Row Group 1, column 'b': DataPage(1)      (126 bytes , 1 requests) [data]",
+            "  Row Group 1, column 'b': MultiPage(dictionary_page: true, data_pages: [0, 1])  (1856 bytes, 1 requests) [data]",
             "Read Multi:",
             "  Row Group 1, column 'c': DictionaryPage   (7217 bytes, 1 requests) [data]",
             "  Row Group 1, column 'c': DataPage(0)      (113 bytes , 1 requests) [data]",
diff --git a/parquet/tests/arrow_reader/row_filter/async.rs b/parquet/tests/arrow_reader/row_filter/async.rs
index 6fa616d714f1..66840bb8147b 100644
--- a/parquet/tests/arrow_reader/row_filter/async.rs
+++ b/parquet/tests/arrow_reader/row_filter/async.rs
@@ -21,7 +21,7 @@ use std::sync::Arc;
 use arrow::{
     array::AsArray,
     compute::{concat_batches, kernels::cmp::eq, or},
-    datatypes::TimestampNanosecondType,
+    datatypes::{Int32Type, TimestampNanosecondType},
 };
 use arrow_array::{
     ArrayRef, BooleanArray, Int8Array, Int32Array, Int64Array, RecordBatch, Scalar, StringArray,
@@ -525,3 +525,112 @@ async fn test_predicate_pushdown_with_skipped_pages() {
         assert_eq!(batch.column(0).as_string(), &expected);
     }
 }
+
+/// Regression test: when multiple predicates are used, the first predicate's
+/// override of the selection strategy (to Mask) must NOT carry forward to
+/// subsequent predicates. Each predicate must get a fresh Auto policy so the
+/// override can detect page skipping for that predicate's specific columns.
+///
+/// Scenario:
+/// - Dense initial RowSelection (alternating select/skip) covers all pages → Auto resolves to Mask
+/// - Predicate 1 evaluates on column A, narrows selection to skip middle pages
+/// - Predicate 2's column B is fetched sparsely with the narrowed selection (missing middle pages)
+/// - Without the fix, the override for predicate 2 returns early (policy=Mask, not Auto),
+///   so Mask is used and tries to read missing pages → "Invalid offset" error
+#[tokio::test]
+async fn test_multi_predicate_mask_policy_carryover() {
+    // 300 rows, 1 row group, 100 rows per page (3 pages)
+    let num_rows = 300usize;
+    let rows_per_page = 100;
+
+    let schema = Arc::new(Schema::new(vec![
+        Field::new("filter_col", DataType::Int32, false),
+        Field::new("value_col", DataType::Int32, false),
+    ]));
+
+    let props = WriterProperties::builder()
+        .set_max_row_group_row_count(Some(num_rows))
+        .set_data_page_row_count_limit(rows_per_page)
+        .set_write_batch_size(rows_per_page)
+        .set_dictionary_enabled(false)
+        .build();
+
+    // filter_col: 0 for first and last 100 rows, 1 for middle 100 rows
+    // value_col: just row index
+    let filter_values: Vec<i32> = (0..num_rows as i32)
+        .map(|i| if (100..200).contains(&i) { 1 } else { 0 })
+        .collect();
+    let value_values: Vec<i32> = (0..num_rows as i32).collect();
+
+    let batch = RecordBatch::try_new(
+        schema.clone(),
+        vec![
+            Arc::new(Int32Array::from(filter_values)) as ArrayRef,
+            Arc::new(Int32Array::from(value_values)) as ArrayRef,
+        ],
+    )
+    .unwrap();
+
+    let mut buffer = Vec::new();
+    let mut writer = ArrowWriter::try_new(&mut buffer, schema.clone(), Some(props)).unwrap();
+    writer.write(&batch).unwrap();
+    writer.close().unwrap();
+    let buffer = Bytes::from(buffer);
+
+    let reader = TestReader::new(buffer);
+    let options = ArrowReaderOptions::default().with_page_index_policy(PageIndexPolicy::Required);
+    let builder = ParquetRecordBatchStreamBuilder::new_with_options(reader, options)
+        .await
+        .unwrap();
+
+    let schema_descr = builder.metadata().file_metadata().schema_descr_ptr();
+
+    // Dense initial selection: Select(1), Skip(1) repeated → triggers Mask strategy
+    // Covers all pages since every page has selected rows
+    let selectors: Vec<RowSelector> = (0..num_rows / 2)
+        .flat_map(|_| vec![RowSelector::select(1), RowSelector::skip(1)])
+        .collect();
+    let selection = RowSelection::from(selectors);
+
+    // Predicate 1 on filter_col: keeps only rows where filter_col == 0
+    // (first 100 and last 100 rows). After this, middle page is excluded.
+    let pred1 = ArrowPredicateFn::new(ProjectionMask::roots(&schema_descr, [0]), |batch| {
+        let col = batch.column(0).as_primitive::<Int32Type>();
+        Ok(BooleanArray::from_iter(
+            col.iter().map(|v| v.map(|val| val == 0)),
+        ))
+    });
+
+    // Predicate 2 on value_col: keeps rows where value_col < 250
+    // This column is fetched AFTER predicate 1 narrows the selection.
+    // Its sparse data will be missing the middle page.
+    let pred2 = ArrowPredicateFn::new(ProjectionMask::roots(&schema_descr, [1]), |batch| {
+        let col = batch.column(0).as_primitive::<Int32Type>();
+        Ok(BooleanArray::from_iter(
+            col.iter().map(|v| v.map(|val| val < 250)),
+        ))
+    });
+
+    let row_filter = RowFilter::new(vec![Box::new(pred1), Box::new(pred2)]);
+
+    // Output projection: both columns
+    let projection = ProjectionMask::roots(&schema_descr, [0, 1]);
+
+    let stream = builder
+        .with_row_filter(row_filter)
+        .with_row_selection(selection)
+        .with_projection(projection)
+        .with_max_predicate_cache_size(0)
+        .build()
+        .unwrap();
+
+    // Without the fix, this panics with:
+    // "Invalid offset in sparse column chunk data: ..., no matching page found."
+    let batches: Vec<RecordBatch> = stream.try_collect().await.unwrap();
+    let batch = concat_batches(&batches[0].schema(), &batches).unwrap();
+
+    // Verify results: rows where filter_col==0 AND value_col<250 AND original alternating selection
+    // That's even-indexed rows in [0,100) with value<250 → rows 0,2,4,...,98 (50 rows)
+    // Plus even-indexed rows in [200,250) with value<250 → rows 200,202,...,248 (25 rows)
+    assert_eq!(batch.num_rows(), 75);
+}
diff --git a/parquet/tests/arrow_reader/row_filter/sync.rs b/parquet/tests/arrow_reader/row_filter/sync.rs
index e59fa392cfd4..77a75220dc2e 100644
--- a/parquet/tests/arrow_reader/row_filter/sync.rs
+++ b/parquet/tests/arrow_reader/row_filter/sync.rs
@@ -206,7 +206,6 @@ fn test_row_filter_full_page_skip_is_handled() {
 /// Without the fix, the list column over-skips by one record, causing
 /// struct children to disagree on record counts.
 #[test]
-#[should_panic(expected = "StructArrayReader out of sync in read_records, expected 1 read, got 0")]
 fn test_row_selection_list_column_v2_page_boundary_skip() {
     use arrow_array::builder::{Int32Builder, ListBuilder};
 
@@ -327,7 +326,6 @@ fn test_row_selection_list_column_v2_page_boundary_skip() {
 /// bug causes one leaf to over-skip by one record while the other stays
 /// correct.
 #[test]
-#[should_panic(expected = "Not all children array length are the same!")]
 fn test_list_struct_page_boundary_desync_produces_length_mismatch() {
     use arrow_array::Array;
     use arrow_array::builder::{Int32Builder, ListBuilder, StringBuilder, StructBuilder};
diff --git a/parquet/tests/arrow_writer.rs b/parquet/tests/arrow_writer.rs
new file mode 100644
index 000000000000..020b4c6267e0
--- /dev/null
+++ b/parquet/tests/arrow_writer.rs
@@ -0,0 +1,50 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! Tests for [`ArrowWriter`]
+
+use arrow::array::Float64Array;
+use arrow::datatypes::{DataType, Field, Schema};
+use arrow::record_batch::RecordBatch;
+use parquet::arrow::ArrowWriter;
+use parquet::basic::Encoding;
+use parquet::file::properties::WriterProperties;
+use std::sync::Arc;
+
+#[test]
+#[should_panic(
+    expected = "DeltaBitPackDecoder only supports Int32Type, UInt32Type, Int64Type, and UInt64Type"
+)]
+fn test_delta_bit_pack_type() {
+    let props = WriterProperties::builder()
+        .set_column_encoding("col".into(), Encoding::DELTA_BINARY_PACKED)
+        .build();
+
+    let record_batch = RecordBatch::try_new(
+        Arc::new(Schema::new(vec![Field::new(
+            "col",
+            DataType::Float64,
+            false,
+        )])),
+        vec![Arc::new(Float64Array::from_iter_values(vec![1., 2.]))],
+    )
+    .unwrap();
+
+    let mut buffer = Vec::new();
+    let mut writer = ArrowWriter::try_new(&mut buffer, record_batch.schema(), Some(props)).unwrap();
+    let _ = writer.write(&record_batch);
+}
diff --git a/parquet/tests/geospatial.rs b/parquet/tests/geospatial.rs
index 4f449df920e8..fcc93661ed97 100644
--- a/parquet/tests/geospatial.rs
+++ b/parquet/tests/geospatial.rs
@@ -380,8 +380,8 @@ mod test {
 
         for i in 0..reader.num_row_groups() {
             let row_group = reader.get_row_group(i).unwrap();
-            values.truncate(0);
-            def_levels.truncate(0);
+            values.clear();
+            def_levels.clear();
 
             let mut row_group_out = writer.next_row_group().unwrap();