diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index c6f116c..7426b4b 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -55,6 +55,10 @@ jobs: - ChunkCodecCore/** - ChunkCodecTests/** - LibLz4/** + LibLzma: + - ChunkCodecCore/** + - ChunkCodecTests/** + - LibLzma/** LibSnappy: - ChunkCodecCore/** - ChunkCodecTests/** diff --git a/LibLzma/CHANGELOG.md b/LibLzma/CHANGELOG.md new file mode 100644 index 0000000..65eee55 --- /dev/null +++ b/LibLzma/CHANGELOG.md @@ -0,0 +1,11 @@ +# Release Notes + +All notable changes to this package will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). + +## Unreleased + +### Added + +- Initial release diff --git a/LibLzma/LICENSE b/LibLzma/LICENSE new file mode 100644 index 0000000..af87135 --- /dev/null +++ b/LibLzma/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Nathan Zimmerberg + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/LibLzma/Project.toml b/LibLzma/Project.toml new file mode 100644 index 0000000..d813070 --- /dev/null +++ b/LibLzma/Project.toml @@ -0,0 +1,16 @@ +name = "ChunkCodecLibLzma" +uuid = "e95d29e5-19c5-4afd-ae0f-beb790efacdf" +version = "0.1.0" +authors = ["nhz2 "] + +[workspace] +projects = ["test"] + +[deps] +ChunkCodecCore = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1" +XZ_jll = "ffd25f8a-64ca-5728-b0f7-c24cf3aae800" + +[compat] +ChunkCodecCore = "1" +XZ_jll = "5" +julia = "1.6" diff --git a/LibLzma/README.md b/LibLzma/README.md new file mode 100644 index 0000000..759432d --- /dev/null +++ b/LibLzma/README.md @@ -0,0 +1,24 @@ +# ChunkCodecLibLzma + +This package implements the ChunkCodec interface for the following encoders and decoders +using the liblzma C library + +1. `XZCodec`, `XZEncodeOptions`, `XZDecodeOptions` + +## Example + +```julia-repl +julia> using ChunkCodecLibLzma + +julia> data = [0x00, 0x01, 0x02, 0x03]; + +julia> compressed_data = encode(XZEncodeOptions(;preset=UInt32(6), check=ChunkCodecLibLzma.LZMA_CHECK_CRC64), data); + +julia> decompressed_data = decode(XZCodec(), compressed_data; max_size=length(data), size_hint=length(data)); + +julia> data == decompressed_data +true +``` + +The low level interface is defined in the `ChunkCodecCore` package. + diff --git a/LibLzma/src/ChunkCodecLibLzma.jl b/LibLzma/src/ChunkCodecLibLzma.jl new file mode 100644 index 0000000..29ae0b3 --- /dev/null +++ b/LibLzma/src/ChunkCodecLibLzma.jl @@ -0,0 +1,66 @@ +module ChunkCodecLibLzma + +using XZ_jll: liblzma + +using ChunkCodecCore: + Codec, + EncodeOptions, + DecodeOptions, + check_in_range, + check_contiguous, + grow_dst!, + DecodingError, + MaybeSize, + NOT_SIZE +import ChunkCodecCore: + decode_options, + can_concatenate, + try_decode!, + try_resize_decode!, + try_encode!, + encode_bound, + try_find_decoded_size, + decoded_size_range + +export XZCodec, + XZEncodeOptions, + XZDecodeOptions, + LZMADecodingError + +if VERSION >= v"1.11.0-DEV.469" + eval(Meta.parse(""" + public + LZMA_PRESET_LEVEL_MASK, + LZMA_PRESET_EXTREME, + LZMA_CHECK_NONE, + LZMA_CHECK_CRC32, + LZMA_CHECK_CRC64, + LZMA_CHECK_SHA256 + """)) +end + + + +# reexport ChunkCodecCore +using ChunkCodecCore: ChunkCodecCore, encode, decode +export ChunkCodecCore, encode, decode + + +include("liblzma.jl") + +""" + struct XZCodec <: Codec + XZCodec() + +xz compression using the liblzma C library + +See also [`XZEncodeOptions`](@ref) and [`XZDecodeOptions`](@ref) +""" +struct XZCodec <: Codec +end +decode_options(::XZCodec) = XZDecodeOptions() + +include("encode.jl") +include("decode.jl") + +end # module ChunkCodecLibLzma diff --git a/LibLzma/src/decode.jl b/LibLzma/src/decode.jl new file mode 100644 index 0000000..0d884f0 --- /dev/null +++ b/LibLzma/src/decode.jl @@ -0,0 +1,145 @@ +""" + LZMADecodingError(code) + +Error for data that cannot be decoded. +""" +struct LZMADecodingError <: DecodingError + code::Cint +end + +function Base.showerror(io::IO, err::LZMADecodingError) + print(io, "LZMADecodingError: ") + if err.code == LZMA_DATA_ERROR + print(io, "LZMA_DATA_ERROR: data is corrupt") + elseif err.code == LZMA_FORMAT_ERROR + print(io, "LZMA_FORMAT_ERROR: file format not recognized") + elseif err.code == LZMA_OPTIONS_ERROR + print(io, "LZMA_OPTIONS_ERROR: reserved bits set in headers. Data corrupt, or upgrading liblzma may help") + elseif err.code == LZMA_BUF_ERROR + print(io, "LZMA_BUF_ERROR: the compressed stream may be truncated or corrupt") + else + print(io, "unknown lzma error code: ") + print(io, err.code) + end + nothing +end + +""" + struct XZDecodeOptions <: DecodeOptions + XZDecodeOptions(; kwargs...) + +xz decompression using the liblzma C library + +Like the command line tool `xz`, decoding accepts concatenated and padded compressed data and returns the decompressed data concatenated. + +# Keyword Arguments + +- `codec::XZCodec=XZCodec()` +""" +struct XZDecodeOptions <: DecodeOptions + codec::XZCodec +end +function XZDecodeOptions(; + codec::XZCodec=XZCodec(), + kwargs... + ) + XZDecodeOptions(codec) +end +can_concatenate(::XZDecodeOptions) = true + +function try_find_decoded_size(::XZDecodeOptions, src::AbstractVector{UInt8})::Nothing + # Potentially this could be found by parsing through the index + # This is complicated by potential padding and concatenated streams + nothing +end + +function try_decode!(d::XZDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize + try_resize_decode!(d, dst, src, Int64(length(dst))) +end + +function try_resize_decode!(d::XZDecodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}, max_size::Int64; kwargs...)::MaybeSize + dst_size::Int64 = length(dst) + src_size::Int64 = length(src) + src_left::Int64 = src_size + dst_left::Int64 = dst_size + check_contiguous(dst) + check_contiguous(src) + if isempty(src) + throw(LZMADecodingError(LZMA_BUF_ERROR)) + end + cconv_src = Base.cconvert(Ptr{UInt8}, src) + # We start by allocating our allocator + cconv_allocator = Base.cconvert(Ref{lzma_allocator}, default_allocator()) + GC.@preserve cconv_allocator begin + allocator_p = Base.unsafe_convert(Ref{lzma_allocator}, cconv_allocator) + stream = lzma_stream() + stream.allocator = allocator_p + ret = @ccall liblzma.lzma_stream_decoder( + stream::Ref{lzma_stream}, + typemax(UInt64)::UInt64, + LZMA_CONCATENATED::UInt32, + )::Cint + if ret == LZMA_MEM_ERROR + throw(OutOfMemoryError()) + elseif ret != LZMA_OK + error("Unknown lzma error code: $(ret)") + end + try + while true # Loop for resizing dst + # dst may get resized, so cconvert needs to be redone on each iteration. + cconv_dst = Base.cconvert(Ptr{UInt8}, dst) + GC.@preserve cconv_src cconv_dst begin + src_p = Base.unsafe_convert(Ptr{UInt8}, cconv_src) + dst_p = Base.unsafe_convert(Ptr{UInt8}, cconv_dst) + stream.avail_in = src_left + stream.avail_out = dst_left + stream.next_in = src_p + (src_size - src_left) + stream.next_out = dst_p + (dst_size - dst_left) + ret = @ccall liblzma.lzma_code( + stream::Ref{lzma_stream}, + LZMA_FINISH::Cint, + )::Cint + if ret == LZMA_OK || ret == LZMA_STREAM_END + @assert stream.avail_in ≤ src_left + @assert stream.avail_out ≤ dst_left + src_left = stream.avail_in + dst_left = stream.avail_out + @assert src_left ∈ 0:src_size + @assert dst_left ∈ 0:dst_size + end + if ret == LZMA_OK + # Likely not enough output space + # but also potentially the input is truncated + # Unlike zlib, we can keep trying until we get LZMA_BUF_ERROR + if iszero(dst_left) + # Give more space and try again + # This might result in returning a NOT_SIZE + # when instead the actual issue is that the input is truncated. + local next_size = grow_dst!(dst, max_size) + if isnothing(next_size) + return NOT_SIZE + end + dst_left += next_size - dst_size + dst_size = next_size + @assert dst_left > 0 + end + elseif ret == LZMA_STREAM_END + @assert iszero(src_left) + # yay done return decompressed size + real_dst_size = dst_size - dst_left + @assert real_dst_size ∈ 0:length(dst) + return real_dst_size + elseif ret == LZMA_DATA_ERROR || ret == LZMA_FORMAT_ERROR || ret == LZMA_OPTIONS_ERROR || ret == LZMA_BUF_ERROR + throw(LZMADecodingError(ret)) + elseif ret == LZMA_MEM_ERROR + throw(OutOfMemoryError()) + else + error("Unknown lzma error code: $(ret)") + end + end + end + finally + @ccall liblzma.lzma_end(stream::Ref{lzma_stream})::Cvoid + end + end +end diff --git a/LibLzma/src/encode.jl b/LibLzma/src/encode.jl new file mode 100644 index 0000000..2396974 --- /dev/null +++ b/LibLzma/src/encode.jl @@ -0,0 +1,102 @@ +""" + struct XZEncodeOptions <: EncodeOptions + XZEncodeOptions(; kwargs...) + +xz compression using the liblzma C library + +# Keyword Arguments + +- `codec::XZCodec=XZCodec()` +- `preset::UInt32=UInt32(6)`: Compression preset to use. + + A preset consist of level + number and zero or more flags. Usually flags aren't + used, so preset is simply a number [0, 9] which match + the options -0 ... -9 of the xz command line tool. + Additional flags can be set using bitwise-or with + the preset level number, e.g. `UInt32(6) | LZMA_PRESET_EXTREME`. +- `check::Int32=LZMA_CHECK_CRC64`: Integrity check type to use. + + Available checks are `LZMA_CHECK_NONE`, `LZMA_CHECK_CRC32`, `LZMA_CHECK_CRC64`, and `LZMA_CHECK_SHA256` +""" +struct XZEncodeOptions <: EncodeOptions + codec::XZCodec + preset::UInt32 + check::Int32 +end + +function XZEncodeOptions(; + codec::XZCodec=XZCodec(), + preset::UInt32=UInt32(6), + check::Int32=LZMA_CHECK_CRC64, + kwargs... + ) + check_in_range(Int32(0):LZMA_CHECK_ID_MAX; check) + XZEncodeOptions( + codec, + preset, + check, + ) +end + +function decoded_size_range(::XZEncodeOptions) + max_size = if sizeof(Csize_t) == 8 + typemax(Int64)-Int64(1) + elseif sizeof(Csize_t) == 4 + Int64(typemax(Csize_t)) + else + @assert false "unreachable" + end + Int64(0):Int64(1):max_size +end + +function encode_bound(::XZEncodeOptions, src_size::Int64)::Int64 + if src_size < 0 + Int64(-1) + elseif src_size > typemax(Csize_t) + typemax(Int64) + else + res = @ccall liblzma.lzma_stream_buffer_bound(src_size::Csize_t)::Csize_t + if iszero(res) || res > typemax(Int64) + typemax(Int64) + else + res%Int64 + end + end +end + +function try_encode!(e::XZEncodeOptions, dst::AbstractVector{UInt8}, src::AbstractVector{UInt8}; kwargs...)::MaybeSize + check_contiguous(dst) + check_contiguous(src) + src_size::Int64 = length(src) + dst_size::Int64 = length(dst) + check_in_range(decoded_size_range(e); src_size) + if iszero(dst_size) + return NOT_SIZE + end + out_pos = Ref(Csize_t(0)) + ret = @ccall liblzma.lzma_easy_buffer_encode( + e.preset::UInt32, e.check::Cint, + default_allocator()::Ref{lzma_allocator}, + src::Ptr{UInt8}, src_size::Csize_t, + dst::Ptr{UInt8}, out_pos::Ref{Csize_t}, dst_size::Csize_t + )::Cint + if ret == LZMA_OK + # Encoding was successful. + return Int64(out_pos[]) + elseif ret == LZMA_BUF_ERROR + # Not enough output buffer space. + return NOT_SIZE + elseif ret == LZMA_UNSUPPORTED_CHECK + throw(ArgumentError("Specified integrity check: $(e.check) is not supported")) + elseif ret == LZMA_OPTIONS_ERROR + throw(ArgumentError("Specified preset: $(e.preset) is not supported")) + elseif ret == LZMA_MEM_ERROR + throw(OutOfMemoryError()) + elseif ret == LZMA_DATA_ERROR + # This is usually unreachable since the limits are near 2^63 bytes + throw(ArgumentError("File size limits exceeded")) + else + error("Unknown lzma error code: $(ret)") + end +end diff --git a/LibLzma/src/liblzma.jl b/LibLzma/src/liblzma.jl new file mode 100644 index 0000000..bee6a39 --- /dev/null +++ b/LibLzma/src/liblzma.jl @@ -0,0 +1,258 @@ +# Constants and c wrapper functions ported to Julia from https://github.com/tukaani-project/xz/tree/v5.8.1/src/liblzma/api + +#= +Return values used by several functions in liblzma +=# +const LZMA_OK = Cint(0) +const LZMA_STREAM_END = Cint(1) +const LZMA_NO_CHECK = Cint(2) +const LZMA_UNSUPPORTED_CHECK = Cint(3) +const LZMA_GET_CHECK = Cint(4) +const LZMA_MEM_ERROR = Cint(5) +const LZMA_MEMLIMIT_ERROR = Cint(6) +const LZMA_FORMAT_ERROR = Cint(7) +const LZMA_OPTIONS_ERROR = Cint(8) +const LZMA_DATA_ERROR = Cint(9) +const LZMA_BUF_ERROR = Cint(10) +const LZMA_PROG_ERROR = Cint(11) + +#= +The 'action' argument for lzma_code() +=# +const LZMA_RUN = Cint(0) +const LZMA_SYNC_FLUSH = Cint(1) +const LZMA_FULL_FLUSH = Cint(2) +const LZMA_FULL_BARRIER = Cint(4) +const LZMA_FINISH = Cint(3) + +#= +Custom functions for memory handling +=# +@assert typemax(Csize_t) ≥ typemax(Cint) + +function lzma_alloc(::Ptr{Cvoid}, nmemb::Csize_t, size::Csize_t)::Ptr{Cvoid} + # nmemb is always set to 1 and can be ignored + @assert nmemb == 1 + ccall(:jl_malloc, Ptr{Cvoid}, (Csize_t,), size) +end +function lzma_free(::Ptr{Cvoid}, p::Ptr{Cvoid}) + ccall(:jl_free, Cvoid, (Ptr{Cvoid},), p) +end + +struct lzma_allocator + alloc::Ptr{Cvoid} + free::Ptr{Cvoid} + opaque::Ptr{Cvoid} +end + +function default_allocator() + lzma_allocator( + @cfunction(lzma_alloc, Ptr{Cvoid}, (Ptr{Cvoid}, Csize_t, Csize_t)), + @cfunction(lzma_free, Cvoid, (Ptr{Cvoid}, Ptr{Cvoid})), + C_NULL, + ) +end + +mutable struct lzma_stream + next_in::Ptr{UInt8} + avail_in::Csize_t + total_in::UInt64 + + next_out::Ptr{UInt8} + avail_out::Csize_t + total_out::UInt64 + + allocator::Ptr{lzma_allocator} + internal::Ptr{Cvoid} + + reserved_ptr1::Ptr{Cvoid} + reserved_ptr2::Ptr{Cvoid} + reserved_ptr3::Ptr{Cvoid} + reserved_ptr4::Ptr{Cvoid} + + seek_pos::UInt64 + + reserved_int2::UInt64 + reserved_int3::Csize_t + reserved_int4::Csize_t + reserved_enum1::Cint + reserved_enum2::Cint + + function lzma_stream() + new( + C_NULL, 0, 0, + C_NULL, 0, 0, + C_NULL,#default_allocator_ptr, + C_NULL, + C_NULL, C_NULL, C_NULL, C_NULL, + 0, 0, 0, 0, + 0, 0, + ) + end +end + +#= +Type of the integrity check (Check ID) +=# +""" + const LZMA_CHECK_NONE = Cint(0) + +No Check is calculated. + +Size of the Check field: 0 bytes +""" +const LZMA_CHECK_NONE = Cint(0) + +""" + const LZMA_CHECK_CRC32 = Cint(1) + +CRC32 using the polynomial from the IEEE 802.3 standard + +Size of the Check field: 4 bytes +""" +const LZMA_CHECK_CRC32 = Cint(1) + +""" + const LZMA_CHECK_CRC64 = Cint(4) + +CRC64 using the polynomial from the ECMA-182 standard + +Size of the Check field: 8 bytes +""" +const LZMA_CHECK_CRC64 = Cint(4) + +""" + const LZMA_CHECK_SHA256 = Cint(10) + +SHA-256 + +Size of the Check field: 32 bytes +""" +const LZMA_CHECK_SHA256 = Cint(10) + +""" +const LZMA_CHECK_ID_MAX = Cint(15) + +Maximum valid Check ID + +The .xz file format specification specifies 16 Check IDs (0-15). Some +of them are only reserved, that is, no actual Check algorithm has been +assigned. When decoding, liblzma still accepts unknown Check IDs for +future compatibility. If a valid but unsupported Check ID is detected, +liblzma can indicate a warning; see the flags LZMA_TELL_NO_CHECK, +LZMA_TELL_UNSUPPORTED_CHECK, and LZMA_TELL_ANY_CHECK. +""" +const LZMA_CHECK_ID_MAX = Cint(15) + +""" + lzma_check_is_supported(check::Cint)::Bool + +Test if the given Check ID is supported. + +LZMA_CHECK_NONE and LZMA_CHECK_CRC32 are always supported (even if +liblzma is built with limited features). + +It is safe to call this with a value that is not in the range [0, 15]; +in that case the return value is always false. + +# Arguments +- `check`: Check ID + +# Returns +- `true` if Check ID is supported by this liblzma build. +- `false` otherwise. +""" +function lzma_check_is_supported(check::Cint)::Bool + @ccall liblzma.lzma_check_is_supported(check::Cint)::Bool +end + +const LZMA_PRESET_DEFAULT = UInt32(6) + +""" + const LZMA_PRESET_LEVEL_MASK = UInt32(0x1F) + +Mask for preset level + +This is useful only if you need to extract the level from the preset +variable. That should be rare. +""" +const LZMA_PRESET_LEVEL_MASK = UInt32(0x1F) + +""" + const LZMA_PRESET_EXTREME = UInt32(1)<<31 + +Extreme compression preset + +This flag modifies the preset to make the encoding significantly slower +while improving the compression ratio only marginally. This is useful +when you don't mind spending time to get as small result as possible. + +This flag doesn't affect the memory usage requirements of the decoder (at +least not significantly). The memory usage of the encoder may be increased +a little but only at the lowest preset levels (0-3). +""" +const LZMA_PRESET_EXTREME = UInt32(1)<<31 + +#= +This flag enables decoding of concatenated files with file formats that +allow concatenating compressed files as is. From the formats currently +supported by liblzma, only the .xz and .lz formats allow concatenated +files. Concatenated files are not allowed with the legacy .lzma format. + +This flag also affects the usage of the 'action' argument for lzma_code(). +When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END +unless LZMA_FINISH is used as 'action'. Thus, the application has to set +LZMA_FINISH in the same way as it does when encoding. + +If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH +as 'action' for lzma_code(), but the usage of LZMA_FINISH isn't required. +=# +const LZMA_CONCATENATED = UInt32(0x08) + + +# The following is the original license info from lzma.h and LICENSE + +#= header of lzma.h +/* SPDX-License-Identifier: 0BSD */ + +/** + * \file api/lzma.h + * \brief The public API of liblzma data compression library + * \mainpage + * + * liblzma is a general-purpose data compression library with a zlib-like API. + * The native file format is .xz, but also the old .lzma format and raw (no + * headers) streams are supported. Multiple compression algorithms (filters) + * are supported. Currently LZMA2 is the primary filter. + * + * liblzma is part of XZ Utils . XZ Utils + * includes a gzip-like command line tool named xz and some other tools. + * XZ Utils is developed and maintained by Lasse Collin. + * + * Major parts of liblzma are based on code written by Igor Pavlov, + * specifically the LZMA SDK . + * + * The SHA-256 implementation in liblzma is based on code written by + * Wei Dai in Crypto++ Library . + * + * liblzma is distributed under the BSD Zero Clause License (0BSD). + */ + +/* + * Author: Lasse Collin + */ +=# + +#= contents of COPYING.0BSD +Permission to use, copy, modify, and/or distribute this +software for any purpose with or without fee is hereby granted. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL +THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR +CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM +LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, +NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN +CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. +=# \ No newline at end of file diff --git a/LibLzma/test/Project.toml b/LibLzma/test/Project.toml new file mode 100644 index 0000000..912aeec --- /dev/null +++ b/LibLzma/test/Project.toml @@ -0,0 +1,12 @@ +[deps] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ChunkCodecCore = "0b6fb165-00bc-4d37-ab8b-79f91016dbe1" +ChunkCodecLibLzma = "e95d29e5-19c5-4afd-ae0f-beb790efacdf" +ChunkCodecTests = "06b1ce50-b741-4199-b118-ba5fe1a70fa7" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[sources] +ChunkCodecCore = {path = "../../ChunkCodecCore"} +ChunkCodecLibLzma = {path = ".."} +ChunkCodecTests = {path = "../../ChunkCodecTests"} diff --git a/LibLzma/test/runtests.jl b/LibLzma/test/runtests.jl new file mode 100644 index 0000000..77b02cc --- /dev/null +++ b/LibLzma/test/runtests.jl @@ -0,0 +1,138 @@ +using Random: Random +using ChunkCodecCore: encode_bound, decoded_size_range, encode, decode, try_encode!, is_size +using ChunkCodecLibLzma: + ChunkCodecLibLzma, + XZCodec, + XZEncodeOptions, + XZDecodeOptions, + LZMADecodingError, + LZMA_CHECK_NONE, + LZMA_CHECK_CRC32, + LZMA_CHECK_CRC64, + LZMA_CHECK_SHA256, + LZMA_PRESET_EXTREME +using ChunkCodecTests: test_codec +using Test: @testset, @test_throws, @test +using Aqua: Aqua + +Aqua.test_all(ChunkCodecLibLzma; persistent_tasks = false) + +Random.seed!(1234) +@testset "default" begin + test_codec(XZCodec(), XZEncodeOptions(), XZDecodeOptions(); trials=5) +end +@testset "preset options" begin + @test_throws ArgumentError encode(XZEncodeOptions(; preset=UInt32(10)), UInt8[]) + for i in 0:9 + test_codec(XZCodec(), XZEncodeOptions(; preset=UInt32(i)), XZDecodeOptions(); trials=5) + end +end +@testset "extreme preset" begin + for i in 0:9 + test_codec(XZCodec(), XZEncodeOptions(; preset=UInt32(i) | LZMA_PRESET_EXTREME), XZDecodeOptions(); trials=5) + end +end +@testset "check options" begin + @test_throws ArgumentError XZEncodeOptions(; check=Int32(-1)) + @test_throws ArgumentError XZEncodeOptions(; check=Int32(16)) + @test_throws ArgumentError encode(XZEncodeOptions(; check=Int32(15)), UInt8[]) + for check in [LZMA_CHECK_NONE, LZMA_CHECK_CRC32, LZMA_CHECK_CRC64, LZMA_CHECK_SHA256] + test_codec(XZCodec(), XZEncodeOptions(; check), XZDecodeOptions(); trials=5) + end +end +@testset "concatenated streams" begin + e = XZEncodeOptions() + d = XZDecodeOptions() + u1 = [0x00, 0x01, 0x02] + u2 = [0x03, 0x04, 0x05, 0x06] + u3 = UInt8[] + c1 = encode(e, u1) + c2 = encode(e, u2) + c3 = encode(e, u3) + # Two streams concatenated + @test decode(d, [c1; c2]) == [u1; u2] + # Three streams concatenated + @test decode(d, [c1; c2; c1]) == [u1; u2; u1] + # Empty stream in between + @test decode(d, [c1; c3; c2]) == [u1; u2] + # Multiple empty streams + @test decode(d, [c3; c3; c1; c3; c2; c3]) == [u1; u2] + # Just empty streams + @test decode(d, [c3; c3; c3]) == UInt8[] +end +@testset "padding" begin + e = XZEncodeOptions() + d = XZDecodeOptions() + u1 = [0x00, 0x01, 0x02] + u2 = [0x03, 0x04, 0x05, 0x06] + c1 = encode(e, u1) + c2 = encode(e, u2) + pad4 = zeros(UInt8, 4) + pad8 = zeros(UInt8, 8) + pad12 = zeros(UInt8, 12) + # Padding at end of file (multiple of 4) + @test decode(d, [c1; pad4]) == u1 + @test decode(d, [c1; pad8]) == u1 + @test decode(d, [c1; pad12]) == u1 + # Padding between streams (multiple of 4) + @test decode(d, [c1; pad4; c2]) == [u1; u2] + @test decode(d, [c1; pad8; c2]) == [u1; u2] + @test decode(d, [c1; pad4; c2; pad4]) == [u1; u2] + # Multiple padding sections + @test decode(d, [c1; pad4; pad4; c2]) == [u1; u2] + @test decode(d, [c1; pad4; c2; pad8]) == [u1; u2] +end +@testset "invalid padding" begin + e = XZEncodeOptions() + d = XZDecodeOptions() + u = [0x00, 0x01, 0x02] + c = encode(e, u) + # Padding not a multiple of 4 at end + @test_throws LZMADecodingError decode(d, [c; 0x00]) + @test_throws LZMADecodingError decode(d, [c; 0x00; 0x00]) + @test_throws LZMADecodingError decode(d, [c; 0x00; 0x00; 0x00]) + @test_throws LZMADecodingError decode(d, [c; zeros(UInt8, 5)]) + @test_throws LZMADecodingError decode(d, [c; zeros(UInt8, 6)]) + @test_throws LZMADecodingError decode(d, [c; zeros(UInt8, 7)]) + # Padding not a multiple of 4 between streams + @test_throws LZMADecodingError decode(d, [c; 0x00; c]) + @test_throws LZMADecodingError decode(d, [c; 0x00; 0x00; c]) + @test_throws LZMADecodingError decode(d, [c; 0x00; 0x00; 0x00; c]) + @test_throws LZMADecodingError decode(d, [c; zeros(UInt8, 5); c]) + # Padding at beginning of file - not allowed + @test_throws LZMADecodingError decode(d, [zeros(UInt8, 4); c]) + # Just padding (no stream) - should fail + @test_throws LZMADecodingError decode(d, zeros(UInt8, 4)) + @test_throws LZMADecodingError decode(d, zeros(UInt8, 8)) +end +@testset "unexpected eof" begin + e = XZEncodeOptions() + d = XZDecodeOptions() + u = [0x00, 0x01, 0x02] + c = encode(e, u) + @test decode(d, c) == u + for i in 1:length(c) + @test_throws LZMADecodingError(ChunkCodecLibLzma.LZMA_BUF_ERROR) decode(d, c[1:i-1]) + end + @test_throws LZMADecodingError decode(d, u) + c[end] = 0x00 + @test_throws LZMADecodingError decode(d, c) + @test_throws LZMADecodingError decode(d, [encode(e, u); c]) + @test_throws LZMADecodingError decode(d, [encode(e, u); 0x00]) +end +@testset "errors" begin + @test sprint(Base.showerror, LZMADecodingError(ChunkCodecLibLzma.LZMA_BUF_ERROR)) == + "LZMADecodingError: LZMA_BUF_ERROR: the compressed stream may be truncated or corrupt" + @test sprint(Base.showerror, LZMADecodingError(ChunkCodecLibLzma.LZMA_DATA_ERROR)) == + "LZMADecodingError: LZMA_DATA_ERROR: data is corrupt" + @test sprint(Base.showerror, LZMADecodingError(ChunkCodecLibLzma.LZMA_FORMAT_ERROR)) == + "LZMADecodingError: LZMA_FORMAT_ERROR: file format not recognized" + @test sprint(Base.showerror, LZMADecodingError(ChunkCodecLibLzma.LZMA_OPTIONS_ERROR)) == + "LZMADecodingError: LZMA_OPTIONS_ERROR: reserved bits set in headers. Data corrupt, or upgrading liblzma may help" + @test sprint(Base.showerror, LZMADecodingError(-100)) == + "LZMADecodingError: unknown lzma error code: -100" +end +@testset "not enough dst space try_encode!" begin + @test !is_size(try_encode!(XZEncodeOptions(), UInt8[], UInt8[])) + @test !is_size(try_encode!(XZEncodeOptions(), UInt8[], UInt8[0x00])) +end diff --git a/Project.toml b/Project.toml index 26ec5c1..41ccaf8 100644 --- a/Project.toml +++ b/Project.toml @@ -8,6 +8,7 @@ projects = [ "LibBrotli", "LibBzip2", "LibLz4", + "LibLzma", "LibSnappy", "LibZlib", "LibZstd", diff --git a/test/CondaPkg.toml b/test/CondaPkg.toml index 6e83b7f..6a1d65b 100644 --- a/test/CondaPkg.toml +++ b/test/CondaPkg.toml @@ -1,4 +1,4 @@ [pip.deps] -imagecodecs = "==2025.8.2" +imagecodecs = "==2025.11.11" hdf5plugin = "==5.1.0" h5py = "==3.13.0" diff --git a/test/Project.toml b/test/Project.toml index d2349d2..2a91a3f 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -9,6 +9,7 @@ ChunkCodecLibBlosc = "c6a955be-ab7f-4fbb-b38f-caf93db6b928" ChunkCodecLibBrotli = "653b0ff7-85b5-4442-93c1-dcc330d3ec7d" ChunkCodecLibBzip2 = "2b723af9-f480-4e8d-a1e4-4a9f5a906122" ChunkCodecLibLz4 = "7e9cc85e-5614-42a3-ad86-b78f920b38a5" +ChunkCodecLibLzma = "e95d29e5-19c5-4afd-ae0f-beb790efacdf" ChunkCodecLibSnappy = "eac87354-86d5-4a5b-ab5f-a6ee56b239b3" ChunkCodecLibZlib = "4c0bbee4-addc-4d73-81a0-b6caacae83c8" ChunkCodecLibZstd = "55437552-ac27-4d47-9aa3-63184e8fd398" @@ -34,6 +35,7 @@ ChunkCodecLibBlosc = {path = "../LibBlosc"} ChunkCodecLibBrotli = {path = "../LibBrotli"} ChunkCodecLibBzip2 = {path = "../LibBzip2"} ChunkCodecLibLz4 = {path = "../LibLz4"} +ChunkCodecLibLzma = {path = "../LibLzma"} ChunkCodecLibSnappy = {path = "../LibSnappy"} ChunkCodecLibZlib = {path = "../LibZlib"} ChunkCodecLibZstd = {path = "../LibZstd"} diff --git a/test/imagecodecs-compat.jl b/test/imagecodecs-compat.jl index 40c61d0..3b1202c 100644 --- a/test/imagecodecs-compat.jl +++ b/test/imagecodecs-compat.jl @@ -6,6 +6,7 @@ using ChunkCodecLibBrotli, ChunkCodecLibBzip2, ChunkCodecLibLz4, + ChunkCodecLibLzma, ChunkCodecLibSnappy, ChunkCodecLibZlib, ChunkCodecLibZstd, @@ -14,6 +15,7 @@ using ChunkCodecTests: rand_test_data using Test codecs = [ + (ChunkCodecLibLzma.XZEncodeOptions(), ("lzma", (;)), 50), ( ChunkCodecBitshuffle.BShufEncodeOptions(codec=ChunkCodecBitshuffle.BShufCodec( 1, diff --git a/test/jet.jl b/test/jet.jl index 2d062f3..6f5f649 100644 --- a/test/jet.jl +++ b/test/jet.jl @@ -9,6 +9,7 @@ codec_packages = [ :ChunkCodecLibBrotli, :ChunkCodecLibBzip2, :ChunkCodecLibLz4, + :ChunkCodecLibLzma, :ChunkCodecLibSnappy, :ChunkCodecLibZlib, :ChunkCodecLibZstd,