diff --git a/Readme.md b/Readme.md index debfb41..2e13e5e 100644 --- a/Readme.md +++ b/Readme.md @@ -1,5 +1,5 @@ # netchdf -_last updated: 7/26/2025_ +_last updated: 7/27/2025_ This is a rewrite in Kotlin of parts of the devcdm and netcdf-java libraries. @@ -294,6 +294,10 @@ local to the variable they are referenced by. * Vlen Strings are stored on the heap. Fixed length Strings are kept in byte arrays. This is more or less invisible to the User. +We have very limited example data for the "version 4" data layouts of HDF5 (much thanks to James Mudd and +the [jhdf project](https://github.com/jamesmudd/jhdf) for code and the test data we do have.) +Please carefully check results if you have this kind of data, and send us samples to test! + #### Compare with HDF4 data model * All data access is unified under the netchdf API. diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt index cc9ea87..2cf4004 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/BTree1data.kt @@ -27,12 +27,12 @@ internal class BTree1data( // if other layouts like BTree2data had this interface we could use in chunkConcurrent override fun asSequence(): Sequence = sequence { repeat( tiling.nelems) { - //val startingIndex = tiling.orderToIndex(it.toLong()) - //val indexSpace = IndexSpace(startingIndex, tiling.chunk) yield(findDataChunk(it) ?: missingDataChunk(it, tiling)) } } + fun chunkIterator(): Iterator = asSequence().iterator() + internal fun findDataChunk(order: Int): DataChunk? { return rootNode.findDataChunk(order) } diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt index 1adf560..c912934 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/H5chunkReader.kt @@ -13,7 +13,7 @@ import kotlin.collections.iterator private val debugChunking = false -// DataLayoutSingleChunk4, DataLayoutImplicit4, DataLayoutFixedArray4, DataLayoutExtensibleArray4, DataLayoutBtreeVer2 +// DataLayoutSingleChunk4, DataLayoutImplicit4, DataLayoutFixedArray4, DataLayoutExtensibleArray4, DataLayoutBtreeVer2, DataLayoutBTreeVer1 internal fun H5builder.readChunkedData(v2: Variable, wantSection: Section, index: Iterator): ArrayTyped { val vinfo = v2.spObject as DataContainerVariable val h5type = vinfo.h5type @@ -62,59 +62,7 @@ internal fun H5builder.readChunkedData(v2: Variable, wantSection: Section } } -/* DataLayoutBTreeVer1 (to be removed) -internal fun H5builder.readBtreeVer1(v2: Variable, wantSection: Section): ArrayTyped { - val vinfo = v2.spObject as DataContainerVariable - val h5type = vinfo.h5type - - val elemSize = vinfo.storageDims[vinfo.storageDims.size - 1].toInt() // last one is always the elements size - val datatype = vinfo.h5type.datatype() - - val wantSpace = IndexSpace(wantSection) - val sizeBytes = wantSpace.totalElements * elemSize - if (sizeBytes <= 0 || sizeBytes >= Int.MAX_VALUE) { - throw RuntimeException("Illegal nbytes to read = $sizeBytes") - } - val ba = ByteArray(sizeBytes.toInt()) - - val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1) - BTree1(this, vinfo.dataPos, 1, vinfo.storageDims.size) - else - throw RuntimeException("Unsupprted mdl ${vinfo.mdl}") - - val tiledData = H5TiledData1(btree1, v2.shape, vinfo.storageDims) - val filters = FilterPipeline(v2.name, vinfo.mfp, vinfo.h5type.isBE) - if (debugChunking) println(" readChunkedData tiles=${tiledData.tiling}") - - var transferChunks = 0 - val state = OpenFileState(0L, vinfo.h5type.isBE) - for (dataChunk: DataChunk in tiledData.dataChunks(wantSpace)) { // : Iterable - val dataSection = IndexSpace(v2.rank, dataChunk.offsets(), vinfo.storageDims) - val chunker = Chunker(dataSection, wantSpace) // each DataChunkEntry has its own Chunker iteration - if (dataChunk.isMissing()) { - if (debugChunking) println(" missing ${dataChunk.show(tiledData.tiling)}") - chunker.transferMissing(vinfo.fillValue, elemSize, ba) - } else { - if (debugChunking) println(" chunk=${dataChunk.show(tiledData.tiling)}") - state.pos = dataChunk.childAddress() - val chunkData = this.raf.readByteArray(state, dataChunk.chunkSize()) - val filteredData = if (dataChunk.filterMask() == null) chunkData - else filters.apply(chunkData, dataChunk.filterMask()!!) - chunker.transferBA(filteredData, 0, elemSize, ba, 0) - transferChunks += chunker.transferChunks - } - } - - val shape = wantSpace.shape.toIntArray() - - return if (h5type.datatype5 == Datatype5.Vlen) { - this.processVlenIntoArray(h5type, shape, ba, wantSpace.totalElements.toInt(), elemSize) - } else { - this.processDataIntoArray(ba, vinfo.h5type.isBE, datatype, shape, h5type, elemSize) as ArrayTyped - } -} */ - -// DataLayoutBTreeVer1 +/* DataLayoutBTreeVer1 internal fun H5builder.readBtree1data(v2: Variable, wantSection: Section): ArrayTyped { val vinfo = v2.spObject as DataContainerVariable val h5type = vinfo.h5type @@ -130,19 +78,6 @@ internal fun H5builder.readBtree1data(v2: Variable, wantSection: Section) val ba = ByteArray(sizeBytes.toInt()) val btree1 = if (vinfo.mdl is DataLayoutBTreeVer1) { - // internal class BTree1( - // val h5: H5builder, - // val rootNodeAddress: Long, - // val nodeType : Int, // 0 = group/symbol table, 1 = raw data chunks - // val ndimStorage: Int? = null // TODO allowed to be null ?? - //) - // BTree1(this, vinfo.dataPos, 1, vinfo.storageDims.size) - // internal class BTree1data( - // val raf: OpenFileExtended, - // rootNodeAddress: Long, - // varShape: LongArray, - // chunkShape: LongArray, - //) val rafext: OpenFileExtended = this.openNewFileExtended() BTree1data(rafext, vinfo.dataPos, v2.shape, vinfo.storageDims) } else { @@ -179,10 +114,9 @@ internal fun H5builder.readBtree1data(v2: Variable, wantSection: Section) } else { this.processDataIntoArray(ba, vinfo.h5type.isBE, datatype, shape, h5type, elemSize) as ArrayTyped } -} +} */ -// DataLayoutBTreeVer1 using chunkIterator -internal fun readBtreeWithChunkIterator(hdf5: Hdf5File, v2: Variable, wantSection: SectionPartial?): ArrayTyped { +internal fun readChunkedDataWithIterator(hdf5: Hdf5File, v2: Variable, wantSection: SectionPartial?): ArrayTyped { val vinfo = v2.spObject as DataContainerVariable val datatype = vinfo.h5type.datatype() diff --git a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt index 21594e7..383925a 100644 --- a/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt +++ b/core/src/commonMain/kotlin/com/sunya/netchdf/hdf5/Hdf5File.kt @@ -84,10 +84,14 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf { } else if (vinfo.mdl is DataLayoutBTreeVer1) { // skip the concurrent read on the hard stuff if ( recurse || (v2.datatype == Datatype.CHAR || v2.datatype == Datatype.COMPOUND || v2.datatype == Datatype.OPAQUE || - v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN)) - header.readBtree1data(v2, section) - else - readBtreeWithChunkIterator(this, v2, wantSection) + v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN)) { + val btree1 = + BTree1data(header.makeFileExtended(), vinfo.dataPos, v2.shape, vinfo.storageDims) + header.readChunkedData(v2, section, btree1.chunkIterator()) + // header.readBtree1data(v2, section) + } else { + readChunkedDataWithIterator(this, v2, wantSection) + } } else if (vinfo.mdl is DataLayoutSingleChunk4) { // header.readSingleChunk(v2, wantSection) @@ -118,9 +122,8 @@ class Hdf5File(val filename : String, strict : Boolean = false) : Netchdf { v2.datatype == Datatype.STRING || v2.datatype == Datatype.VLEN)) { val index = BTree2data(header.makeFileExtended(), v2.name, vinfo.dataPos, v2.shape, vinfo.storageDims) header.readChunkedData(v2, section, index.chunkIterator()) - // header.readBtree1data(v2, section) } else { - readBtreeWithChunkIterator(this, v2, wantSection) + readChunkedDataWithIterator(this, v2, wantSection) } } else {