diff --git a/CMakeLists.txt b/CMakeLists.txt index 72acce71e0..09f2d11095 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -514,6 +514,8 @@ find_package(ZLIB REQUIRED) find_package(BZip2 REQUIRED) +find_package(ZSTD REQUIRED) + # Disable CMAKE_FIND_PACKAGE_PREFER_CONFIG, temporarily, because # we don't presently support the using libxml2's Config.cmake set(PACKAGE_PREFER_CONFIG_BAK ${CMAKE_FIND_PACKAGE_PREFER_CONFIG}) diff --git a/INSTALL.md b/INSTALL.md index fe91ffa26e..12e2df72b4 100644 --- a/INSTALL.md +++ b/INSTALL.md @@ -39,6 +39,7 @@ configuration options. - [`libcheck`](#libcheck) - [`bzip2`](#bzip2) - [`zlib`](#zlib) + - [`zstd`](#zstd) - [`libxml2`](#libxml2) - [`libpcre2`](#libpcre2) - [`openssl` (`libcrypto`, `libssl`)](#openssl-libcrypto-libssl) @@ -114,6 +115,7 @@ libclamav requires these library dependencies: - `libbz2` / `bzip2` - `libz` / `zlib` +- `libzstd` / `zstd` - `libxml2` - `libpcre2` - `openssl` @@ -569,6 +571,13 @@ But if you: -D ZLIB_LIBRARY="_filepath of zlib library_" ``` +### `zstd` + +```sh + -D ZSTD_INCLUDE_DIR="_filepath of zstd header directory_" + -D ZSTD_LIBRARY="_filepath of zstd library_" +``` + ### `libxml2` ```sh diff --git a/cmake/FindZSTD.cmake b/cmake/FindZSTD.cmake new file mode 100644 index 0000000000..9e93080714 --- /dev/null +++ b/cmake/FindZSTD.cmake @@ -0,0 +1,116 @@ +# Distributed under the OSI-approved BSD 3-Clause License. See accompanying +# file Copyright.txt or https://cmake.org/licensing for details. + +#[=======================================================================[.rst: +FindZSTD +------- + +Finds the Zstandard (zstd) library. + +Imported Targets +^^^^^^^^^^^^^^^^ + +This module provides the following imported targets, if found: + +``ZSTD::zstd`` + The Zstandard library + +Result Variables +^^^^^^^^^^^^^^^^ + +This will define the following variables: + +``ZSTD_FOUND`` + True if the system has the zstd library. +``ZSTD_VERSION`` + The version of the zstd library which was found. +``ZSTD_INCLUDE_DIRS`` + Include directories needed to use zstd. +``ZSTD_LIBRARIES`` + Libraries needed to link to zstd. + +Cache Variables +^^^^^^^^^^^^^^^ + +The following cache variables may also be set: + +``ZSTD_INCLUDE_DIR`` + The directory containing ``zstd.h``. +``ZSTD_LIBRARY`` + The path to the zstd library. + +#]=======================================================================] + +find_package(PkgConfig QUIET) +pkg_check_modules(PC_ZSTD QUIET libzstd) + +find_path(ZSTD_INCLUDE_DIR + NAMES zstd.h + PATHS ${PC_ZSTD_INCLUDE_DIRS} + PATH_SUFFIXES zstd include/zstd +) + +if(ZSTD_LIBRARY) + set(ZSTD_LIBRARIES "${ZSTD_LIBRARY}") +endif() + +if(NOT ZSTD_LIBRARIES) + find_library(ZSTD_LIBRARY_RELEASE + NAMES zstd NAMES_PER_DIR HINTS ${PC_ZSTD_LIBRARY_DIRS} PATH_SUFFIXES lib) + find_library(ZSTD_LIBRARY_DEBUG + NAMES zstdd NAMES_PER_DIR HINTS ${PC_ZSTD_LIBRARY_DIRS} PATH_SUFFIXES lib) + + include(SelectLibraryConfigurations) + SELECT_LIBRARY_CONFIGURATIONS(ZSTD) +else() + file(TO_CMAKE_PATH "${ZSTD_LIBRARIES}" ZSTD_LIBRARIES) +endif() + +set(ZSTD_VERSION ${PC_ZSTD_VERSION}) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ZSTD + FOUND_VAR ZSTD_FOUND + REQUIRED_VARS + ZSTD_LIBRARIES + ZSTD_INCLUDE_DIR + VERSION_VAR ZSTD_VERSION +) + +if(ZSTD_FOUND) + set(ZSTD_INCLUDE_DIRS ${ZSTD_INCLUDE_DIR}) + set(ZSTD_DEFINITIONS ${PC_ZSTD_CFLAGS_OTHER}) + + if(NOT TARGET ZSTD::zstd) + add_library(ZSTD::zstd UNKNOWN IMPORTED) + set_target_properties(ZSTD::zstd PROPERTIES + INTERFACE_COMPILE_OPTIONS "${PC_ZSTD_CFLAGS_OTHER}" + INTERFACE_INCLUDE_DIRECTORIES "${ZSTD_INCLUDE_DIRS}") + + if(ZSTD_LIBRARY_RELEASE) + set_property(TARGET ZSTD::zstd APPEND PROPERTY + IMPORTED_CONFIGURATIONS RELEASE) + set_target_properties(ZSTD::zstd PROPERTIES + IMPORTED_LOCATION_RELEASE "${ZSTD_LIBRARY_RELEASE}" + ) + endif() + + if(ZSTD_LIBRARY_DEBUG) + set_property(TARGET ZSTD::zstd APPEND PROPERTY + IMPORTED_CONFIGURATIONS DEBUG) + set_target_properties(ZSTD::zstd PROPERTIES + IMPORTED_LOCATION_DEBUG "${ZSTD_LIBRARY_DEBUG}" + ) + endif() + + if(NOT ZSTD_LIBRARY_RELEASE AND NOT ZSTD_LIBRARY_DEBUG) + set_property(TARGET ZSTD::zstd APPEND PROPERTY + IMPORTED_LOCATION "${ZSTD_LIBRARY}") + endif() + endif() +endif() + +mark_as_advanced( + ZSTD_INCLUDE_DIR + ZSTD_LIBRARY +) diff --git a/libclamav/CMakeLists.txt b/libclamav/CMakeLists.txt index bcee81b7cf..c7e8536f7b 100644 --- a/libclamav/CMakeLists.txt +++ b/libclamav/CMakeLists.txt @@ -446,6 +446,7 @@ if(ENABLE_SHARED_LIB) OpenSSL::Crypto ZLIB::ZLIB BZip2::BZip2 + ZSTD::zstd PCRE2::pcre2 LibXml2::LibXml2 JSONC::jsonc ) @@ -570,6 +571,7 @@ if(ENABLE_STATIC_LIB) OpenSSL::Crypto ZLIB::ZLIB BZip2::BZip2 + ZSTD::zstd PCRE2::pcre2 LibXml2::LibXml2 JSONC::jsonc ) diff --git a/libclamav/dconf.c b/libclamav/dconf.c index e68c99f970..c5e4484542 100644 --- a/libclamav/dconf.c +++ b/libclamav/dconf.c @@ -109,6 +109,7 @@ static struct dconf_module modules[] = { {"ARCHIVE", "UDF", ARCH_CONF_UDF, 1}, {"ARCHIVE", "LHA", ARCH_CONF_LHA_LZH, 1}, {"ARCHIVE", "ALZ", ARCH_CONF_ALZ, 1}, + {"ARCHIVE", "ZSTD", ARCH_CONF_ZSTD, 1}, {"DOCUMENT", "HTML", DOC_CONF_HTML, 1}, {"DOCUMENT", "RTF", DOC_CONF_RTF, 1}, diff --git a/libclamav/dconf.h b/libclamav/dconf.h index 5bc2d5a7ec..0b6bb7562e 100644 --- a/libclamav/dconf.h +++ b/libclamav/dconf.h @@ -99,6 +99,7 @@ struct cli_dconf { #define ARCH_CONF_UDF 0x8000000 #define ARCH_CONF_LHA_LZH 0x10000000 #define ARCH_CONF_ALZ 0x20000000 +#define ARCH_CONF_ZSTD 0x40000000 /* Document flags */ #define DOC_CONF_HTML 0x1 diff --git a/libclamav/filetypes.c b/libclamav/filetypes.c index 07c2253342..8ec51d8129 100644 --- a/libclamav/filetypes.c +++ b/libclamav/filetypes.c @@ -143,6 +143,7 @@ static const struct ftmap_s { { "CL_TYPE_PYTHON_COMPILED", CL_TYPE_PYTHON_COMPILED }, { "CL_TYPE_LHA_LZH", CL_TYPE_LHA_LZH }, { "CL_TYPE_AI_MODEL", CL_TYPE_AI_MODEL }, + { "CL_TYPE_ZSTD", CL_TYPE_ZSTD }, { NULL, CL_TYPE_IGNORED } }; // clang-format on diff --git a/libclamav/filetypes.h b/libclamav/filetypes.h index 68cfa097e0..d963715f28 100644 --- a/libclamav/filetypes.h +++ b/libclamav/filetypes.h @@ -96,6 +96,7 @@ typedef enum cli_file { CL_TYPE_PYTHON_COMPILED, CL_TYPE_LHA_LZH, CL_TYPE_AI_MODEL, + CL_TYPE_ZSTD, /* Section for partition types */ CL_TYPE_PART_ANY, /* unknown partition type */ diff --git a/libclamav/filetypes_int.h b/libclamav/filetypes_int.h index ac50113783..10f39ea831 100644 --- a/libclamav/filetypes_int.h +++ b/libclamav/filetypes_int.h @@ -163,6 +163,7 @@ static const char *ftypes_int[] = { "0:0:78617221:XAR container file:CL_TYPE_ANY:CL_TYPE_XAR:75", "1:EOF-512:6b6f6c79:DMG container file:CL_TYPE_ANY:CL_TYPE_DMG:75", "0:0:fd377a585a00:XZ container file:CL_TYPE_ANY:CL_TYPE_XZ:76", + "0:0:28b52ffd:Zstandard compressed file:CL_TYPE_ANY:CL_TYPE_ZSTD:76", "4:1024:482B0004:HFS+ partition:CL_TYPE_PART_ANY:CL_TYPE_PART_HFSPLUS:75", "4:1024:48580005:HFSX partition:CL_TYPE_PART_ANY:CL_TYPE_PART_HFSPLUS:75", "1:0:3c3f786d6c2076657273696f6e3d22312e3022{0-1024}3c576f726b626f6f6b:Microsoft Excel 2003 XML Document:CL_TYPE_ANY:CL_TYPE_XML_XL:80", diff --git a/libclamav/scanners.c b/libclamav/scanners.c index b282662169..da7d1a9530 100644 --- a/libclamav/scanners.c +++ b/libclamav/scanners.c @@ -52,6 +52,7 @@ #define DCONF_OTHER ctx->dconf->other #include +#include #include "clamav_rust.h" #include "clamav.h" @@ -1420,6 +1421,118 @@ static cl_error_t cli_scanxz(cli_ctx *ctx) return ret; } +static cl_error_t cli_scanzstd(cli_ctx *ctx) +{ + cl_error_t ret = CL_SUCCESS; + int fd = -1; + char *tmpname = NULL; + size_t off = 0; + unsigned long size = 0; + ZSTD_DStream *dstrm = NULL; + size_t ores = 0; + size_t const obuf_size = ZSTD_DStreamOutSize(); + size_t const ibuf_size = ZSTD_DStreamInSize(); + unsigned char *obuf = NULL; + size_t avail; + const void *next_in; + ZSTD_inBuffer in; + ZSTD_outBuffer out; + + cli_dbgmsg("in cli_scanzstd()\n"); + + dstrm = ZSTD_createDStream(); + if (dstrm == NULL) { + cli_errmsg("cli_scanzstd: failed to create ZSTD_DStream\n"); + return CL_EMEM; + } + + ores = ZSTD_initDStream(dstrm); + if (ZSTD_isError(ores)) { + cli_errmsg("cli_scanzstd: ZSTD_initDStream failed: %s\n", ZSTD_getErrorName(ores)); + ZSTD_freeDStream(dstrm); + return CL_EOPEN; + } + + obuf = malloc(obuf_size); + if (obuf == NULL) { + cli_errmsg("cli_scanzstd: no memory for decompress buffer\n"); + ZSTD_freeDStream(dstrm); + return CL_EMEM; + } + + if ((ret = cli_gentempfd(ctx->this_layer_tmpdir, &tmpname, &fd))) { + cli_errmsg("cli_scanzstd: Can't generate temporary file\n"); + free(obuf); + ZSTD_freeDStream(dstrm); + return ret; + } + cli_dbgmsg("cli_scanzstd: decompressing to file %s\n", tmpname); + + do { + next_in = fmap_need_off_once_len(ctx->fmap, off, ibuf_size, &avail); + + if (!next_in || avail == 0) { + break; + } + off += avail; + + in.src = next_in; + in.size = avail; + in.pos = 0; + + do { + out.dst = obuf; + out.size = obuf_size; + out.pos = 0; + + ores = ZSTD_decompressStream(dstrm, &out, &in); + if (ZSTD_isError(ores)) { + cli_dbgmsg("cli_scanzstd: decompress error: %s\n", ZSTD_getErrorName(ores)); + /* Still scan whatever we've decompressed so far */ + goto zstd_scan; + } + + if (out.pos > 0) { + if (cli_writen(fd, obuf, out.pos) != out.pos) { + cli_errmsg("cli_scanzstd: Can't write to file\n"); + ret = CL_EWRITE; + goto zstd_exit; + } + size += out.pos; + + if (cli_checklimits("cli_scanzstd", ctx, size, 0, 0) != CL_SUCCESS) { + cli_warnmsg("cli_scanzstd: decompress file size exceeds limits - " + "only scanning %lu bytes\n", + size); + goto zstd_scan; + } + } + + if (ores == 0) { + /* frame complete; reset to handle concatenated frames */ + ZSTD_DCtx_reset(dstrm, ZSTD_reset_session_only); + } + } while (in.pos < in.size || out.pos == out.size); + } while (1); + +zstd_scan: + if (ret == CL_SUCCESS) { + ret = cli_magic_scan_desc(fd, tmpname, ctx, NULL, LAYER_ATTRIBUTES_NONE); + } + +zstd_exit: + ZSTD_freeDStream(dstrm); + close(fd); + if (!ctx->engine->keeptmp) { + if (cli_unlink(tmpname) && ret == CL_SUCCESS) { + ret = CL_EUNLINK; + } + } + free(tmpname); + free(obuf); + return ret; +} + static cl_error_t cli_scanszdd(cli_ctx *ctx) { int ofd; @@ -4854,6 +4967,11 @@ cl_error_t cli_magic_scan(cli_ctx *ctx, cli_file_t type) ret = cli_scanxz(ctx); break; + case CL_TYPE_ZSTD: + if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_ZSTD)) + ret = cli_scanzstd(ctx); + break; + case CL_TYPE_GPT: if (SCAN_PARSE_ARCHIVE && (DCONF_ARCH & ARCH_CONF_GPT)) ret = cli_scangpt(ctx, 0); diff --git a/libclamav_rust/src/sys.rs b/libclamav_rust/src/sys.rs index 085dcf914c..7616d772d4 100644 --- a/libclamav_rust/src/sys.rs +++ b/libclamav_rust/src/sys.rs @@ -369,36 +369,37 @@ pub const cli_file_CL_TYPE_ONENOTE: cli_file = 554; pub const cli_file_CL_TYPE_PYTHON_COMPILED: cli_file = 555; pub const cli_file_CL_TYPE_LHA_LZH: cli_file = 556; pub const cli_file_CL_TYPE_AI_MODEL: cli_file = 557; -pub const cli_file_CL_TYPE_PART_ANY: cli_file = 558; -pub const cli_file_CL_TYPE_PART_HFSPLUS: cli_file = 559; -pub const cli_file_CL_TYPE_MBR: cli_file = 560; -pub const cli_file_CL_TYPE_HTML: cli_file = 561; -pub const cli_file_CL_TYPE_MAIL: cli_file = 562; -pub const cli_file_CL_TYPE_SFX: cli_file = 563; -pub const cli_file_CL_TYPE_ZIPSFX: cli_file = 564; -pub const cli_file_CL_TYPE_RARSFX: cli_file = 565; -pub const cli_file_CL_TYPE_7ZSFX: cli_file = 566; -pub const cli_file_CL_TYPE_CABSFX: cli_file = 567; -pub const cli_file_CL_TYPE_ARJSFX: cli_file = 568; -pub const cli_file_CL_TYPE_EGGSFX: cli_file = 569; -pub const cli_file_CL_TYPE_NULSFT: cli_file = 570; -pub const cli_file_CL_TYPE_AUTOIT: cli_file = 571; -pub const cli_file_CL_TYPE_ISHIELD_MSI: cli_file = 572; -pub const cli_file_CL_TYPE_ISO9660: cli_file = 573; -pub const cli_file_CL_TYPE_DMG: cli_file = 574; -pub const cli_file_CL_TYPE_GPT: cli_file = 575; -pub const cli_file_CL_TYPE_APM: cli_file = 576; -pub const cli_file_CL_TYPE_XDP: cli_file = 577; -pub const cli_file_CL_TYPE_XML_WORD: cli_file = 578; -pub const cli_file_CL_TYPE_XML_XL: cli_file = 579; -pub const cli_file_CL_TYPE_XML_HWP: cli_file = 580; -pub const cli_file_CL_TYPE_HWPOLE2: cli_file = 581; -pub const cli_file_CL_TYPE_MHTML: cli_file = 582; -pub const cli_file_CL_TYPE_LNK: cli_file = 583; -pub const cli_file_CL_TYPE_UDF: cli_file = 584; -pub const cli_file_CL_TYPE_ALZ: cli_file = 585; -pub const cli_file_CL_TYPE_OTHER: cli_file = 586; -pub const cli_file_CL_TYPE_IGNORED: cli_file = 587; +pub const cli_file_CL_TYPE_ZSTD: cli_file = 558; +pub const cli_file_CL_TYPE_PART_ANY: cli_file = 559; +pub const cli_file_CL_TYPE_PART_HFSPLUS: cli_file = 560; +pub const cli_file_CL_TYPE_MBR: cli_file = 561; +pub const cli_file_CL_TYPE_HTML: cli_file = 562; +pub const cli_file_CL_TYPE_MAIL: cli_file = 563; +pub const cli_file_CL_TYPE_SFX: cli_file = 564; +pub const cli_file_CL_TYPE_ZIPSFX: cli_file = 565; +pub const cli_file_CL_TYPE_RARSFX: cli_file = 566; +pub const cli_file_CL_TYPE_7ZSFX: cli_file = 567; +pub const cli_file_CL_TYPE_CABSFX: cli_file = 568; +pub const cli_file_CL_TYPE_ARJSFX: cli_file = 569; +pub const cli_file_CL_TYPE_EGGSFX: cli_file = 570; +pub const cli_file_CL_TYPE_NULSFT: cli_file = 571; +pub const cli_file_CL_TYPE_AUTOIT: cli_file = 572; +pub const cli_file_CL_TYPE_ISHIELD_MSI: cli_file = 573; +pub const cli_file_CL_TYPE_ISO9660: cli_file = 574; +pub const cli_file_CL_TYPE_DMG: cli_file = 575; +pub const cli_file_CL_TYPE_GPT: cli_file = 576; +pub const cli_file_CL_TYPE_APM: cli_file = 577; +pub const cli_file_CL_TYPE_XDP: cli_file = 578; +pub const cli_file_CL_TYPE_XML_WORD: cli_file = 579; +pub const cli_file_CL_TYPE_XML_XL: cli_file = 580; +pub const cli_file_CL_TYPE_XML_HWP: cli_file = 581; +pub const cli_file_CL_TYPE_HWPOLE2: cli_file = 582; +pub const cli_file_CL_TYPE_MHTML: cli_file = 583; +pub const cli_file_CL_TYPE_LNK: cli_file = 584; +pub const cli_file_CL_TYPE_UDF: cli_file = 585; +pub const cli_file_CL_TYPE_ALZ: cli_file = 586; +pub const cli_file_CL_TYPE_OTHER: cli_file = 587; +pub const cli_file_CL_TYPE_IGNORED: cli_file = 588; pub type cli_file = ::std::os::raw::c_uint; pub use self::cli_file as cli_file_t; #[repr(C)] diff --git a/unit_tests/clamscan/zstd_test.py b/unit_tests/clamscan/zstd_test.py new file mode 100644 index 0000000000..547cbeed7b --- /dev/null +++ b/unit_tests/clamscan/zstd_test.py @@ -0,0 +1,63 @@ +# Copyright (C) 2020-2026 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +""" +Run clamscan tests for Zstandard (zstd) compressed files. +""" + +import sys + +sys.path.append('../unit_tests') +import testcase + + +class TC(testcase.TestCase): + @classmethod + def setUpClass(cls): + super(TC, cls).setUpClass() + + @classmethod + def tearDownClass(cls): + super(TC, cls).tearDownClass() + + def setUp(self): + super(TC, self).setUp() + + def tearDown(self): + super(TC, self).tearDown() + self.verify_valgrind_log() + + def test_zstd(self): + self.step_name('Test scanning a zstd compressed file') + + testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'zstd' / 'testfile.txt.zst' + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'zstd.hdb', + testfile=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'ZSTD_TEST_FILE.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) + + def test_zstd_concatenated(self): + self.step_name('Test scanning a zstd file with concatenated frames') + + testfile = TC.path_source / 'unit_tests' / 'input' / 'other_scanfiles' / 'zstd' / 'concat.txt.zst' + command = '{valgrind} {valgrind_args} {clamscan} -d {path_db} {testfile}'.format( + valgrind=TC.valgrind, valgrind_args=TC.valgrind_args, clamscan=TC.clamscan, + path_db=TC.path_source / 'unit_tests' / 'input' / 'other_sigs' / 'zstd.hdb', + testfile=testfile, + ) + output = self.execute_command(command) + + assert output.ec == 1 # virus + + expected_results = [ + 'ZSTD_TEST_FILE.UNOFFICIAL FOUND', + ] + self.verify_output(output.out, expected=expected_results) diff --git a/unit_tests/input/other_scanfiles/zstd/concat.txt.zst b/unit_tests/input/other_scanfiles/zstd/concat.txt.zst new file mode 100644 index 0000000000..f8b6871f27 Binary files /dev/null and b/unit_tests/input/other_scanfiles/zstd/concat.txt.zst differ diff --git a/unit_tests/input/other_scanfiles/zstd/testfile.txt.zst b/unit_tests/input/other_scanfiles/zstd/testfile.txt.zst new file mode 100644 index 0000000000..0cbf31e512 Binary files /dev/null and b/unit_tests/input/other_scanfiles/zstd/testfile.txt.zst differ diff --git a/unit_tests/input/other_sigs/zstd.hdb b/unit_tests/input/other_sigs/zstd.hdb new file mode 100644 index 0000000000..e8e90c4351 --- /dev/null +++ b/unit_tests/input/other_sigs/zstd.hdb @@ -0,0 +1,2 @@ +1735a5732d057a5b57421ccd0a88e7049c342579:28:ZSTD_TEST_FILE +cf327fba15169e4594083e3a03283ec6c52e835c:54:ZSTD_TEST_FILE