From 40eee14378cb5147322cda1df6be6a122addd856 Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 16 Mar 2026 17:57:36 +0100 Subject: [PATCH 1/5] Add fuzz testing to JSON and EMF formatters Structural validity fuzzing for both formatter backends. --- .github/workflows/fuzz.yml | 81 +++ .gitignore | 5 + fuzz/Cargo.lock | 969 ++++++++++++++++++++++++++++++++ fuzz/Cargo.toml | 30 + fuzz/README.md | 49 ++ fuzz/fuzz_targets/fuzz_emf.rs | 257 +++++++++ fuzz/fuzz_targets/fuzz_entry.rs | 394 +++++++++++++ fuzz/fuzz_targets/fuzz_json.rs | 99 ++++ 8 files changed, 1884 insertions(+) create mode 100644 .github/workflows/fuzz.yml create mode 100644 fuzz/Cargo.lock create mode 100644 fuzz/Cargo.toml create mode 100644 fuzz/README.md create mode 100644 fuzz/fuzz_targets/fuzz_emf.rs create mode 100644 fuzz/fuzz_targets/fuzz_entry.rs create mode 100644 fuzz/fuzz_targets/fuzz_json.rs diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml new file mode 100644 index 00000000..f1824857 --- /dev/null +++ b/.github/workflows/fuzz.yml @@ -0,0 +1,81 @@ +name: Fuzz +on: + workflow_dispatch: # Allow manual triggers + schedule: + - cron: "0 6 * * *" # Every day at 06:00 UTC + +permissions: + contents: read + +jobs: + fuzz: + name: Nightly Fuzz + runs-on: ubuntu-latest + env: + RUST_BACKTRACE: 1 + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly + - name: Restore Rust build cache + uses: Swatinem/rust-cache@v2 + with: + shared-key: fuzz-nightly-${{ runner.os }} + workspaces: | + . -> target + fuzz -> fuzz/target + - name: Compute corpus cache bucket + id: corpus_bucket + run: | + echo "day=$(date -u +%F)" >> "$GITHUB_OUTPUT" + echo "week=$(date -u +%G-W%V)" >> "$GITHUB_OUTPUT" + - name: Restore fuzz corpus cache + id: corpus_restore + uses: actions/cache/restore@v4 + with: + path: fuzz/corpus + # Policy: + # - one evolving cache lineage per branch per day + # - restore from same-day first, then same week, then branch, then OS-wide fallback + # - keep corpus in cache (not committed) to avoid repository bloat + key: fuzz-corpus-${{ runner.os }}-${{ github.ref_name }}-${{ steps.corpus_bucket.outputs.day }}-${{ github.run_id }} + restore-keys: | + fuzz-corpus-${{ runner.os }}-${{ github.ref_name }}-${{ steps.corpus_bucket.outputs.day }}- + fuzz-corpus-${{ runner.os }}-${{ github.ref_name }}-${{ steps.corpus_bucket.outputs.week }}- + fuzz-corpus-${{ runner.os }}-${{ github.ref_name }}- + fuzz-corpus-${{ runner.os }}- + - name: Ensure corpus directories exist + run: mkdir -p fuzz/corpus/fuzz_json fuzz/corpus/fuzz_emf + - name: Install cargo-fuzz + uses: dtolnay/install@cargo-fuzz + - name: Run fuzz_json + run: cargo +nightly fuzz run fuzz_json -- -max_total_time=300 + - name: Run fuzz_emf + run: cargo +nightly fuzz run fuzz_emf -- -max_total_time=300 + - name: Minimize corpus + if: always() + run: | + set -euo pipefail + for target in fuzz_json fuzz_emf; do + corpus_dir="fuzz/corpus/${target}" + if [ -d "$corpus_dir" ] && [ "$(ls -A "$corpus_dir" 2>/dev/null)" ]; then + echo "Minimizing corpus for ${target}..." + before=$(find "$corpus_dir" -type f | wc -l) + cargo +nightly fuzz cmin "$target" || true + after=$(find "$corpus_dir" -type f | wc -l) + echo "${target}: ${before} -> ${after} corpus entries" + else + echo "No corpus to minimize for ${target}" + fi + done + - name: Save fuzz corpus cache + if: always() + uses: actions/cache/save@v4 + with: + path: fuzz/corpus + key: ${{ steps.corpus_restore.outputs.cache-primary-key }} + - name: Upload crash artifacts + if: failure() + uses: actions/upload-artifact@v4 + with: + name: fuzz-artifacts + path: fuzz/artifacts/ diff --git a/.gitignore b/.gitignore index cbd88be0..386a6bc0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ /target /metrique/my /metrique-service-metrics/my + +# Fuzzing +fuzz/target +fuzz/artifacts +fuzz/corpus diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock new file mode 100644 index 00000000..e098ba72 --- /dev/null +++ b/fuzz/Cargo.lock @@ -0,0 +1,969 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" +dependencies = [ + "memchr", +] + +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + +[[package]] +name = "arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1" +dependencies = [ + "derive_arbitrary", +] + +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + +[[package]] +name = "bit-set" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08807e080ed7f9d5433fa9b275196cfc35414f66a0c79d864dc51a0d825231a3" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7" + +[[package]] +name = "bitflags" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843867be96c8daad0d758b57df9392b6d8d271134fce549de6ce169ff98a92af" + +[[package]] +name = "bumpalo" +version = "3.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" + +[[package]] +name = "cc" +version = "1.2.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2" +dependencies = [ + "find-msvc-tools", + "jobserver", + "libc", + "shlex", +] + +[[package]] +name = "cfg-if" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "derive-where" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef941ded77d15ca19b40374869ac6000af1c9f2a4c0f3d4c70926287e6364a8f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_arbitrary" +version = "1.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e567bd82dcff979e4b03460c307b3cdc9e96fde3d73bed1496d2bc75d9dd62a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "dtoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c3cf4824e2d5f025c7b531afcb2325364084a16806f6d47fbc1f5fbd9960590" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "endian-type" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d" + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasip2", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash", +] + +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "indexmap" +version = "2.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7714e70437a7dc3ac8eb7e6f8df75fd8eb422675fc7678aff7364301092b1017" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2" + +[[package]] +name = "jobserver" +version = "0.1.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" +dependencies = [ + "getrandom", + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49715b7073f385ba4bc528e5747d02e66cb39c6146efb66b781f131f0fb399c" +dependencies = [ + "once_cell", + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + +[[package]] +name = "libc" +version = "0.2.183" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" + +[[package]] +name = "libfuzzer-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f12a681b7dd8ce12bff52488013ba614b869148d54dd79836ab85aafdd53f08d" +dependencies = [ + "arbitrary", + "cc", +] + +[[package]] +name = "log" +version = "0.4.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "metrics" +version = "0.24.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5312e9ba3771cfa961b585728215e3d972c950a3eed9252aa093d6301277e8" +dependencies = [ + "ahash", + "portable-atomic", +] + +[[package]] +name = "metrics-util" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdfb1365fea27e6dd9dc1dbc19f570198bc86914533ad639dae939635f096be4" +dependencies = [ + "aho-corasick", + "crossbeam-epoch", + "crossbeam-utils", + "hashbrown", + "indexmap", + "metrics", + "ordered-float", + "quanta", + "radix_trie", + "rand", + "rand_xoshiro", + "sketches-ddsketch", +] + +[[package]] +name = "metrique-core" +version = "0.1.16" +dependencies = [ + "itertools", + "metrique-writer-core", +] + +[[package]] +name = "metrique-fuzz" +version = "0.0.0" +dependencies = [ + "arbitrary", + "libfuzzer-sys", + "metrique-writer-core", + "metrique-writer-format-emf", + "metrique-writer-format-json", + "serde_json", +] + +[[package]] +name = "metrique-writer" +version = "0.1.18" +dependencies = [ + "ahash", + "crossbeam-queue", + "crossbeam-utils", + "metrics", + "metrics-util", + "metrique-core", + "metrique-writer-core", + "metrique-writer-macro", + "rand", + "smallvec", + "tokio", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "metrique-writer-core" +version = "0.1.13" +dependencies = [ + "derive-where", + "itertools", + "serde", + "smallvec", +] + +[[package]] +name = "metrique-writer-format-emf" +version = "0.1.17" +dependencies = [ + "bit-set", + "dtoa", + "hashbrown", + "itertools", + "itoa", + "metrique-writer", + "metrique-writer-core", + "rand", + "serde", + "serde_json", + "smallvec", + "tracing", +] + +[[package]] +name = "metrique-writer-format-json" +version = "0.1.0" +dependencies = [ + "dtoa", + "itoa", + "metrique-writer", + "metrique-writer-core", + "rand", +] + +[[package]] +name = "metrique-writer-macro" +version = "0.1.7" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "str_inflector", + "syn", + "synstructure", +] + +[[package]] +name = "nibble_vec" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a5d83df9f36fe23f0c3648c6bbb8b0298bb5f1939c8f2704431371f4b84d43" +dependencies = [ + "smallvec", +] + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.21.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" + +[[package]] +name = "ordered-float" +version = "5.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" +dependencies = [ + "num-traits", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" + +[[package]] +name = "portable-atomic" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" + +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quanta" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + +[[package]] +name = "radix_trie" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c069c179fcdc6a2fe24d8d18305cf085fdbd4f922c041943e203685d6a1c58fd" +dependencies = [ + "endian-type", + "nibble_vec", +] + +[[package]] +name = "rand" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +dependencies = [ + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_xoshiro" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" +dependencies = [ + "rand_core", +] + +[[package]] +name = "raw-cpuid" +version = "11.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" +dependencies = [ + "bitflags", +] + +[[package]] +name = "regex" +version = "1.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" + +[[package]] +name = "rustversion" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" + +[[package]] +name = "serde" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" +dependencies = [ + "itoa", + "memchr", + "serde", + "serde_core", + "zmij", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + +[[package]] +name = "sketches-ddsketch" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c6f73aeb92d671e0cc4dca167e59b2deb6387c375391bc99ee743f326994a2b" + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "str_inflector" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0b848d5a7695b33ad1be00f84a3c079fe85c9278a325ff9159e6c99cef4ef7" +dependencies = [ + "lazy_static", + "regex", +] + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "tokio" +version = "1.50.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "27ad5e34374e03cfffefc301becb44e9dc3c17584f414349ebe29ed26661822d" +dependencies = [ + "pin-project-lite", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f30143827ddab0d256fd843b7a66d164e9f271cfa0dde49142c5ca0ca291f1e" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + +[[package]] +name = "version_check" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" + +[[package]] +name = "wasi" +version = "0.11.1+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" + +[[package]] +name = "wasip2" +version = "1.0.2+wasi-0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9517f9239f02c069db75e65f174b3da828fe5f5b945c4dd26bd25d89c03ebcf5" +dependencies = [ + "wit-bindgen", +] + +[[package]] +name = "wasm-bindgen" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6532f9a5c1ece3798cb1c2cfdba640b9b3ba884f5db45973a6f442510a87d38e" +dependencies = [ + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a2d50fcf105fb33bb15f00e7a77b772945a2ee45dcf454961fd843e74c18e6" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03ce4caeaac547cdf713d280eda22a730824dd11e6b8c3ca9e42247b25c631e3" +dependencies = [ + "bumpalo", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.114" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75a326b8c223ee17883a4251907455a2431acc2791c98c26279376490c378c16" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "web-sys" +version = "0.3.91" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854ba17bb104abfb26ba36da9729addc7ce7f06f5c0f90f3c391f8461cca21f9" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] + +[[package]] +name = "wit-bindgen" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" + +[[package]] +name = "zerocopy" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2578b716f8a7a858b7f02d5bd870c14bf4ddbbcf3a4c05414ba6503640505e3" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.42" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e6cc098ea4d3bd6246687de65af3f920c430e236bee1e3bf2e441463f08a02f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml new file mode 100644 index 00000000..e533136e --- /dev/null +++ b/fuzz/Cargo.toml @@ -0,0 +1,30 @@ +[package] +name = "metrique-fuzz" +version = "0.0.0" +publish = false +edition = "2024" + +[package.metadata] +cargo-fuzz = true + +[dependencies] +arbitrary = { version = "1", features = ["derive"] } +libfuzzer-sys = "0.4" +metrique-writer-core = { path = "../metrique-writer-core" } +metrique-writer-format-json = { path = "../metrique-writer-format-json" } +metrique-writer-format-emf = { path = "../metrique-writer-format-emf" } +serde_json = "1" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "fuzz_json" +path = "fuzz_targets/fuzz_json.rs" +doc = false + +[[bin]] +name = "fuzz_emf" +path = "fuzz_targets/fuzz_emf.rs" +doc = false diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 00000000..1e4c030d --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,49 @@ +# Fuzzing + +Uses [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) with libFuzzer to stress formatter invariants. + +## Targets + +- `fuzz_json`: If formatting returns `Ok(())`, output must be exactly one valid, newline-terminated JSON object. Tests both regular and sampled paths. +- `fuzz_emf`: If formatting returns `Ok(())`, each emitted line must be a valid JSON object. Tests both regular and sampled paths with EMF-specific flag modes. + +Both targets format two entries through the same formatter instance to exercise state reuse. Additional semantic invariants may be added in the future. + +## Run Locally + +Requires Rust nightly and `cargo-fuzz` (`cargo install cargo-fuzz`). + +```bash +cargo +nightly fuzz run fuzz_json -- -max_total_time=60 +cargo +nightly fuzz run fuzz_emf -- -max_total_time=60 +``` + +## Reproduce a Crash + +```bash +cargo +nightly fuzz run fuzz_json fuzz/artifacts/fuzz_json/ +``` + +Then fix the bug, add a deterministic regression test, and delete the reproducer. + +## Corpus + +`fuzz/corpus` is git-ignored. Do not commit the evolving corpus. + +Minimize locally with: + +```bash +cargo +nightly fuzz cmin fuzz_json +cargo +nightly fuzz cmin fuzz_emf +``` + +## CI + +Nightly GitHub Actions workflow: + +1. Restores corpus from cache +2. Runs both targets (5 min each) +3. Minimizes corpus via `cargo fuzz cmin` +4. Saves corpus back to cache + +Corpus cache uses branch-scoped daily buckets with weekly/branch fallback, so coverage accumulates across runs without committing corpus files. diff --git a/fuzz/fuzz_targets/fuzz_emf.rs b/fuzz/fuzz_targets/fuzz_emf.rs new file mode 100644 index 00000000..a3128128 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_emf.rs @@ -0,0 +1,257 @@ +//! Fuzz target for the EMF (Embedded Metric Format) formatter. +//! +//! Invariants tested: +//! - Successful formatting always produces one or more valid, newline-delimited JSON objects. +//! - Formatter state reuse across entries does not corrupt output. +//! - Both regular and sampled paths are exercised, with EMF-specific flag modes +//! (HighStorageResolution, NoMetric) applied to metrics. + +#![no_main] + +mod fuzz_entry; + +use arbitrary::Unstructured; +use libfuzzer_sys::fuzz_target; + +use metrique_writer_core::format::Format; +use metrique_writer_core::sample::SampledFormat; +use metrique_writer_core::{Entry, EntryWriter}; +use metrique_writer_format_emf::{Emf, HighStorageResolution, NoMetric}; + +use fuzz_entry::{ + FuzzEntry, FuzzField, FuzzMetricValue, arbitrary_sample_rate, arbitrary_string, +}; + +/// EMF-specific flag mode applied on top of base fuzz entries. +#[derive(Debug, Clone, Copy)] +enum FuzzMetricFlagMode { + None, + HighStorageResolution, + NoMetric, + HighThenNoMetric, + NoMetricThenHigh, +} + +impl<'a> arbitrary::Arbitrary<'a> for FuzzMetricFlagMode { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let tag: u8 = u.arbitrary()?; + Ok(match tag % 5 { + 0 => FuzzMetricFlagMode::None, + 1 => FuzzMetricFlagMode::HighStorageResolution, + 2 => FuzzMetricFlagMode::NoMetric, + 3 => FuzzMetricFlagMode::HighThenNoMetric, + _ => FuzzMetricFlagMode::NoMetricThenHigh, + }) + } +} + +/// Wrapper around `FuzzEntry` that applies EMF-specific flag modes to metrics. +#[derive(Debug)] +struct EmfFuzzEntry { + inner: FuzzEntry, + /// One flag mode per metric field. Non-metric fields use index but ignore the flag. + flag_modes: Vec, +} + +impl Entry for EmfFuzzEntry { + fn write<'a>(&'a self, writer: &mut impl EntryWriter<'a>) { + // Delegate config and timestamps to the base entry's logic, + // but handle fields ourselves to apply EMF flags. + if self.inner.allow_split_entries { + writer.config(&const { metrique_writer_core::config::AllowSplitEntries::new() }); + } + if let Some(entry_dimensions) = &self.inner.entry_dimensions { + writer.config(entry_dimensions); + } + for timestamp in &self.inner.timestamps { + writer.timestamp(timestamp.to_system_time()); + } + for (i, field) in self.inner.fields.iter().enumerate() { + let flag_mode = self + .flag_modes + .get(i) + .copied() + .unwrap_or(FuzzMetricFlagMode::None); + match field { + FuzzField::StringProperty { name, value } => { + writer.value(name.as_str(), &value.as_str()); + } + FuzzField::Metric { + name, + observations, + dimensions, + unit, + } => { + let metric = FuzzMetricValue { + observations, + dimensions, + unit: *unit, + }; + match flag_mode { + FuzzMetricFlagMode::None => writer.value(name.as_str(), &metric), + FuzzMetricFlagMode::HighStorageResolution => { + writer.value(name.as_str(), &HighStorageResolution::from(metric)); + } + FuzzMetricFlagMode::NoMetric => { + writer.value(name.as_str(), &NoMetric::from(metric)); + } + FuzzMetricFlagMode::HighThenNoMetric => { + writer.value( + name.as_str(), + &NoMetric::from(HighStorageResolution::from(metric)), + ); + } + FuzzMetricFlagMode::NoMetricThenHigh => { + writer.value( + name.as_str(), + &HighStorageResolution::from(NoMetric::from(metric)), + ); + } + } + } + } + } + } +} + +/// EMF can produce multiple newline-delimited JSON documents (split entries). +fn assert_valid_json_lines(output: &[u8], context: &str) { + let mut saw_document = false; + for line in output.split(|&b| b == b'\n') { + if line.is_empty() { + continue; + } + saw_document = true; + let parsed = serde_json::from_slice::(line).unwrap_or_else(|_| { + panic!( + "EMF produced invalid JSON ({context}): {}", + String::from_utf8_lossy(line), + ) + }); + assert!( + parsed.is_object(), + "EMF produced non-object JSON ({context}): {}", + String::from_utf8_lossy(line), + ); + } + assert!( + saw_document, + "EMF returned success but emitted no JSON documents ({context})", + ); +} + +#[derive(Debug)] +struct FuzzEmfConfig { + namespace: String, + default_dimensions: Vec>, + extra_namespace: Option, + log_group_name: Option, + allow_ignored_dimensions: bool, + sample_rate_a: f32, + sample_rate_b: f32, +} + +impl<'a> arbitrary::Arbitrary<'a> for FuzzEmfConfig { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let namespace = arbitrary_string(u, 48)?; + let extra_namespace = if u.arbitrary::()? { + Some(arbitrary_string(u, 48)?) + } else { + None + }; + let log_group_name = if u.arbitrary::()? { + Some(arbitrary_string(u, 64)?) + } else { + None + }; + + // Keep at least one default dimension set to match common EMF setup. + let set_count = (u.arbitrary::()? % 4) + 1; + let mut default_dimensions = Vec::with_capacity(set_count as usize); + for _ in 0..set_count { + let dim_count = u.arbitrary::()? % 5; + let mut dims = Vec::with_capacity(dim_count as usize); + for _ in 0..dim_count { + dims.push(arbitrary_string(u, 32)?); + } + default_dimensions.push(dims); + } + + Ok(Self { + namespace, + default_dimensions, + extra_namespace, + log_group_name, + allow_ignored_dimensions: u.arbitrary()?, + sample_rate_a: arbitrary_sample_rate(u)?, + sample_rate_b: arbitrary_sample_rate(u)?, + }) + } +} + +fn build_emf(config: &FuzzEmfConfig) -> Emf { + let mut builder = Emf::builder(config.namespace.clone(), config.default_dimensions.clone()) + .allow_ignored_dimensions(config.allow_ignored_dimensions); + if let Some(extra) = &config.extra_namespace { + builder = builder.add_namespace(extra.clone()); + } + if let Some(log_group_name) = &config.log_group_name { + builder = builder.log_group_name(log_group_name.clone()); + } + builder.build() +} + +fuzz_target!(|data: &[u8]| { + let mut u = Unstructured::new(data); + let Ok((entry_a, entry_b, config)) = u.arbitrary::<(FuzzEntry, FuzzEntry, FuzzEmfConfig)>() + else { + return; + }; + + // Generate flag modes for each entry's fields. + let flags_a: Vec = (0..entry_a.fields.len()) + .map(|_| u.arbitrary().unwrap_or(FuzzMetricFlagMode::None)) + .collect(); + let flags_b: Vec = (0..entry_b.fields.len()) + .map(|_| u.arbitrary().unwrap_or(FuzzMetricFlagMode::None)) + .collect(); + + let emf_entry_a = EmfFuzzEntry { + inner: entry_a, + flag_modes: flags_a, + }; + let emf_entry_b = EmfFuzzEntry { + inner: entry_b, + flag_modes: flags_b, + }; + + // Regular EMF path. + let mut format = build_emf(&config); + let mut output = Vec::new(); + + let result = format.format(&emf_entry_a, &mut output); + + if let Ok(()) = result { + assert_valid_json_lines(&output, "first call"); + } + + // Test formatter state reuse with a different entry. + output.clear(); + let result = format.format(&emf_entry_b, &mut output); + if let Ok(()) = result { + assert_valid_json_lines(&output, "state reuse call"); + } + + // Sampled EMF path. + let mut sampled = build_emf(&config).with_sampling(); + output.clear(); + let result = sampled.format_with_sample_rate(&emf_entry_a, &mut output, config.sample_rate_a); + if let Ok(()) = result { + assert_valid_json_lines(&output, "sampled first call"); + } + output.clear(); + let result = sampled.format_with_sample_rate(&emf_entry_b, &mut output, config.sample_rate_b); + if let Ok(()) = result { + assert_valid_json_lines(&output, "sampled state reuse call"); + } +}); diff --git a/fuzz/fuzz_targets/fuzz_entry.rs b/fuzz/fuzz_targets/fuzz_entry.rs new file mode 100644 index 00000000..4ce92b60 --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_entry.rs @@ -0,0 +1,394 @@ +//! Shared fuzz entry types used by all formatter fuzz targets. + +use std::borrow::Cow; +use std::time::{Duration, SystemTime}; + +use arbitrary::{Arbitrary, Unstructured}; + +use metrique_writer_core::{ + config::{AllowSplitEntries, EntryDimensions}, + unit::{NegativeScale, PositiveScale}, + Entry, EntryWriter, MetricFlags, Observation, Unit, ValueWriter, +}; + +// Configuration values used to control the frequency of different types of fields and observations. +/// Percentage of entries to omit entry dimensions. +const OMIT_ENTRY_DIMENSIONS_PERCENT: u8 = 55; +/// Percentage of fields to reuse existing name. +const REUSE_EXISTING_NAME_PERCENT: u8 = 70; +/// Percentage of fields to replace existing name in the pool. +const REPLACE_EXISTING_POOL_NAME_PERCENT: u8 = 20; + +/// A single field in our fuzzed entry. +#[derive(Debug)] +pub enum FuzzField { + /// A string property like `writer.value("key", &"some string")` + StringProperty { name: String, value: String }, + /// A metric with one or more observations + Metric { + name: String, + observations: Vec, + dimensions: Vec<(String, String)>, + unit: Unit, + }, +} + +#[derive(Debug)] +pub enum FuzzObservation { + Unsigned(u64), + Floating(f64), + Repeated { total: f64, occurrences: u64 }, +} + +impl FuzzObservation { + pub fn to_observation(&self) -> Observation { + match *self { + FuzzObservation::Unsigned(v) => Observation::Unsigned(v), + FuzzObservation::Floating(v) => Observation::Floating(v), + FuzzObservation::Repeated { total, occurrences } => { + Observation::Repeated { total, occurrences } + } + } + } +} + +impl<'a> Arbitrary<'a> for FuzzObservation { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let tag: u8 = u.arbitrary()?; + match tag % 4 { + 0 => Ok(FuzzObservation::Unsigned(u.arbitrary()?)), + 1 => Ok(FuzzObservation::Floating(arbitrary_f64(u)?)), + _ => Ok(FuzzObservation::Repeated { + total: arbitrary_f64(u)?, + // Keep this edge case frequent: repeated with 0 count. + occurrences: if u.arbitrary::()? { + 0 + } else { + u.arbitrary()? + }, + }), + } + } +} + +pub fn arbitrary_unit<'a>(u: &mut Unstructured<'a>) -> arbitrary::Result { + let tag: u8 = u.arbitrary()?; + Ok(match tag % 11 { + 0 => Unit::None, + 1 => Unit::Count, + 2 => Unit::Percent, + 3 => Unit::Second(NegativeScale::Micro), + 4 => Unit::Second(NegativeScale::Milli), + 5 => Unit::Second(NegativeScale::One), + 6 => Unit::Byte(PositiveScale::One), + 7 => Unit::Byte(PositiveScale::Kilo), + 8 => Unit::Byte(PositiveScale::Mega), + 9 => Unit::Bit(PositiveScale::One), + _ => Unit::Bit(PositiveScale::Kilo), + }) +} + +#[derive(Debug, Arbitrary)] +pub struct FuzzTimestamp { + pub before_epoch: bool, + pub secs: u64, +} + +impl FuzzTimestamp { + pub fn to_system_time(&self) -> SystemTime { + // Keep values bounded to avoid pathological durations. + let secs = self.secs % (365 * 500 * 24 * 3600); + let duration = Duration::from_secs(secs); + if self.before_epoch { + SystemTime::UNIX_EPOCH + .checked_sub(duration) + .unwrap_or(SystemTime::UNIX_EPOCH) + } else { + SystemTime::UNIX_EPOCH + duration + } + } +} + +/// Fuzzed entry that exercises the full `EntryWriter` interface. +/// +/// This is a format-agnostic entry: it writes metrics directly without +/// format-specific wrappers (like EMF flags). Format-specific fuzz targets +/// can wrap this to add their own behavior. +#[derive(Debug)] +pub struct FuzzEntry { + pub timestamps: Vec, + pub allow_split_entries: bool, + pub entry_dimensions: Option, + pub fields: Vec, +} + +impl Entry for FuzzEntry { + fn write<'a>(&'a self, writer: &mut impl EntryWriter<'a>) { + if self.allow_split_entries { + writer.config(&const { AllowSplitEntries::new() }); + } + if let Some(entry_dimensions) = &self.entry_dimensions { + writer.config(entry_dimensions); + } + for timestamp in &self.timestamps { + writer.timestamp(timestamp.to_system_time()); + } + for field in &self.fields { + match field { + FuzzField::StringProperty { name, value } => { + writer.value(name.as_str(), &value.as_str()); + } + FuzzField::Metric { + name, + observations, + dimensions, + unit, + } => { + let metric = FuzzMetricValue { + observations, + dimensions, + unit: *unit, + }; + writer.value(name.as_str(), &metric); + } + } + } + } +} + +pub struct FuzzMetricValue<'a> { + pub observations: &'a [FuzzObservation], + pub dimensions: &'a [(String, String)], + pub unit: Unit, +} + +impl metrique_writer_core::value::Value for FuzzMetricValue<'_> { + fn write(&self, writer: impl ValueWriter) { + writer.metric( + self.observations + .iter() + .map(FuzzObservation::to_observation), + self.unit, + self.dimensions + .iter() + .map(|(key, value)| (key.as_str(), value.as_str())), + MetricFlags::empty(), + ); + } +} + +impl<'a> Arbitrary<'a> for FuzzEntry { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let timestamp_count = match u.arbitrary::()? % 8 { + 0 => 0, + 1 | 2 | 3 | 4 => 1, + 5 => 2, + 6 => 3, + _ => 4, + }; + let timestamps: Vec = (0..timestamp_count) + .map(|_| u.arbitrary()) + .collect::>()?; + let allow_split_entries: bool = u.arbitrary()?; + + let entry_dimensions = if chance_percent(u, OMIT_ENTRY_DIMENSIONS_PERCENT)? { + None + } else { + Some(arbitrary_entry_dimensions(u)?) + }; + + // 0-24 fields per entry, with occasional larger cases. + let field_count = match u.arbitrary::()? % 8 { + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 4, + 4 => 8, + 5 => 12, + 6 => 16, + _ => 24, + }; + + let mut name_pool = Vec::new(); + let fields: Vec = (0..field_count) + .map(|_| arbitrary_field(u, &mut name_pool)) + .collect::>()?; + + Ok(FuzzEntry { + timestamps, + allow_split_entries, + entry_dimensions, + fields, + }) + } +} + +fn arbitrary_field<'a>( + u: &mut Unstructured<'a>, + name_pool: &mut Vec, +) -> arbitrary::Result { + let is_string: bool = u.arbitrary()?; + let reuse_name = !name_pool.is_empty() && chance_percent(u, REUSE_EXISTING_NAME_PERCENT)?; + let name = if reuse_name { + let idx = choose_index(u, name_pool.len())?; + name_pool[idx].clone() + } else { + let name = arbitrary_string(u, 96)?; + if !name_pool.is_empty() && chance_percent(u, REPLACE_EXISTING_POOL_NAME_PERCENT)? { + // Keep pool bounded and still churn names. + let idx = choose_index(u, name_pool.len())?; + name_pool[idx] = name.clone(); + } else if name_pool.len() < 16 { + name_pool.push(name.clone()); + } + name + }; + + if is_string { + let value = arbitrary_string(u, 192)?; + Ok(FuzzField::StringProperty { name, value }) + } else { + let obs_count = match u.arbitrary::()? % 8 { + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 3, + 4 => 4, + 5 => 8, + 6 => 16, + _ => 24, + }; + let observations: Vec = (0..obs_count) + .map(|_| u.arbitrary()) + .collect::>()?; + + let dim_count = match u.arbitrary::()? % 6 { + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 3, + 4 => 6, + _ => 12, + }; + let dimensions: Vec<(String, String)> = (0..dim_count) + .map(|_| Ok((arbitrary_string(u, 48)?, arbitrary_string(u, 64)?))) + .collect::>()?; + + Ok(FuzzField::Metric { + name, + observations, + dimensions, + unit: arbitrary_unit(u)?, + }) + } +} + +pub fn arbitrary_entry_dimensions<'a>( + u: &mut Unstructured<'a>, +) -> arbitrary::Result { + let set_count = match u.arbitrary::()? % 6 { + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 3, + 4 => 4, + _ => 6, + }; + let mut sets: Vec]>> = Vec::with_capacity(set_count); + for _ in 0..set_count { + let dim_count = match u.arbitrary::()? % 6 { + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 3, + 4 => 4, + _ => 6, + }; + let mut dims: Vec> = Vec::with_capacity(dim_count); + for _ in 0..dim_count { + dims.push(Cow::Owned(arbitrary_string(u, 48)?)); + } + sets.push(Cow::Owned(dims)); + } + Ok(EntryDimensions::new(Cow::Owned(sets))) +} + +pub fn arbitrary_string<'a>(u: &mut Unstructured<'a>, max_len: usize) -> arbitrary::Result { + let len = (u.arbitrary::()? as usize).min(max_len); + let mut s = String::with_capacity(len); + for _ in 0..len { + s.push(arbitrary_char(u)?); + } + Ok(s) +} + +pub fn arbitrary_char<'a>(u: &mut Unstructured<'a>) -> arbitrary::Result { + const JSON_ESCAPES: [char; 6] = ['"', '\\', '\n', '\r', '\t', '\u{08}']; + const DELIMS: [char; 6] = ['{', '}', '[', ']', ':', ',']; + let bucket: u8 = u.arbitrary()?; + match bucket % 10 { + 0 => { + let idx = (u.arbitrary::()? as usize) % JSON_ESCAPES.len(); + Ok(JSON_ESCAPES[idx]) + } + 1 => { + let control = u.int_in_range(0..=0x1f)?; + Ok(char::from(control)) + } + 2 => { + let idx = (u.arbitrary::()? as usize) % DELIMS.len(); + Ok(DELIMS[idx]) + } + 3 => Ok(if u.arbitrary::()? { ' ' } else { '\n' }), + _ => u.arbitrary::(), + } +} + +pub fn arbitrary_f64<'a>(u: &mut Unstructured<'a>) -> arbitrary::Result { + let choice: u8 = u.arbitrary()?; + Ok(match choice % 12 { + 0 => f64::NAN, + 1 => f64::INFINITY, + 2 => f64::NEG_INFINITY, + 3 => -0.0, + 4 => 0.0, + 5 => f64::MAX, + 6 => f64::MIN, + 7 => f64::MIN_POSITIVE, + 8 => f64::from_bits(1), + _ => f64::from_bits(u.arbitrary()?), + }) +} + +pub fn chance_percent<'a>(u: &mut Unstructured<'a>, percent: u8) -> arbitrary::Result { + debug_assert!(percent <= 100); + if percent == 0 { + return Ok(false); + } + if percent == 100 { + return Ok(true); + } + let roll: u8 = u.int_in_range(0..=99)?; + Ok(roll < percent) +} + +pub fn choose_index<'a>(u: &mut Unstructured<'a>, len: usize) -> arbitrary::Result { + debug_assert!(len > 0); + u.int_in_range(0..=len - 1) +} + +/// Generate a fuzzed sample rate with bias toward edge cases. +pub fn arbitrary_sample_rate<'a>(u: &mut Unstructured<'a>) -> arbitrary::Result { + let selector: u8 = u.arbitrary()?; + Ok(match selector % 10 { + 0 => f32::NAN, + 1 => 0.0, + 2 => -1.0, + 3 => f32::INFINITY, + 4 => 1.0, + 5 => 0.5, + 6 => 0.001, + 7 => 1e-30, + _ => f32::from_bits(u.arbitrary()?), + }) +} diff --git a/fuzz/fuzz_targets/fuzz_json.rs b/fuzz/fuzz_targets/fuzz_json.rs new file mode 100644 index 00000000..19fe524d --- /dev/null +++ b/fuzz/fuzz_targets/fuzz_json.rs @@ -0,0 +1,99 @@ +//! Fuzz target for the pure JSON formatter. +//! +//! Invariants tested: +//! - Successful formatting always produces exactly one valid, newline-terminated JSON object. +//! - Formatter state reuse across entries does not corrupt output. +//! - Both regular and sampled paths are exercised. + +#![no_main] + +mod fuzz_entry; + +use arbitrary::Unstructured; +use libfuzzer_sys::fuzz_target; + +use metrique_writer_core::format::Format; +use metrique_writer_core::sample::SampledFormat; +use metrique_writer_format_json::Json; + +use fuzz_entry::{FuzzEntry, arbitrary_sample_rate}; + +/// Assert that output is exactly one newline-terminated JSON object. +/// +/// The JSON formatter documents single-line output: one JSON object followed by `\n`. +fn assert_valid_json_line(output: &[u8], context: &str) { + assert!( + output.ends_with(b"\n"), + "JSON output must end with newline ({context}): {:?}", + String::from_utf8_lossy(output), + ); + + // Strip the trailing newline; the remainder must contain no newlines. + let body = &output[..output.len() - 1]; + assert!( + !body.contains(&b'\n'), + "JSON output must be a single line ({context}): {:?}", + String::from_utf8_lossy(output), + ); + + let parsed = serde_json::from_slice::(body).unwrap_or_else(|_| { + panic!( + "JSON formatter produced invalid JSON ({context}): {}", + String::from_utf8_lossy(output), + ) + }); + assert!( + parsed.is_object(), + "JSON formatter produced non-object JSON ({context}): {}", + String::from_utf8_lossy(output), + ); +} + +fuzz_target!(|data: &[u8]| { + let mut u = Unstructured::new(data); + let Ok((entry_a, entry_b)) = + u.arbitrary::<(FuzzEntry, FuzzEntry)>() + else { + return; + }; + + // Regular (non-sampled) path + let mut format = Json::new(); + let mut output = Vec::new(); + + // Format the entry, we don't care if it returns a validation error, + // but it must never panic. + let result = format.format(&entry_a, &mut output); + + if let Ok(()) = result { + assert_valid_json_line(&output, "first call"); + } + + // Format a different entry through the same formatter to test state reuse. + output.clear(); + let result = format.format(&entry_b, &mut output); + if let Ok(()) = result { + assert_valid_json_line(&output, "state reuse call"); + } + + // Sampled path + let Ok(rate_a) = arbitrary_sample_rate(&mut u) else { + return; + }; + let Ok(rate_b) = arbitrary_sample_rate(&mut u) else { + return; + }; + + let mut sampled = Json::new().with_sampling(); + output.clear(); + let result = sampled.format_with_sample_rate(&entry_a, &mut output, rate_a); + if let Ok(()) = result { + assert_valid_json_line(&output, "sampled first call"); + } + + output.clear(); + let result = sampled.format_with_sample_rate(&entry_b, &mut output, rate_b); + if let Ok(()) = result { + assert_valid_json_line(&output, "sampled state reuse call"); + } +}); From c0bd6a03e894567020434c4f7701ccd3c9b84964 Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 16 Mar 2026 18:29:01 +0100 Subject: [PATCH 2/5] Add 1-minute fuzz run on PRs --- .github/workflows/fuzz.yml | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index f1824857..b2d7e99c 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -1,6 +1,7 @@ name: Fuzz on: - workflow_dispatch: # Allow manual triggers + pull_request: + workflow_dispatch: schedule: - cron: "0 6 * * *" # Every day at 06:00 UTC @@ -8,8 +9,39 @@ permissions: contents: read jobs: - fuzz: + # Quick smoke test on PRs, should catch obvious regressions. + fuzz-pr: + name: Fuzz Smoke Test + if: github.event_name == 'pull_request' + runs-on: ubuntu-latest + env: + RUST_BACKTRACE: 1 + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly + - uses: Swatinem/rust-cache@v2 + with: + shared-key: fuzz-pr-${{ runner.os }} + workspaces: | + . -> target + fuzz -> fuzz/target + - name: Install cargo-fuzz + uses: dtolnay/install@cargo-fuzz + - name: Run fuzz_json + run: cargo +nightly fuzz run fuzz_json -- -max_total_time=60 + - name: Run fuzz_emf + run: cargo +nightly fuzz run fuzz_emf -- -max_total_time=60 + - name: Upload crash artifacts + if: failure() + uses: actions/upload-artifact@v4 + with: + name: fuzz-artifacts-pr + path: fuzz/artifacts/ + + # Longer nightly run with corpus caching. + fuzz-nightly: name: Nightly Fuzz + if: github.event_name != 'pull_request' runs-on: ubuntu-latest env: RUST_BACKTRACE: 1 From 66ee8ad7c8bebea9dd2638888ea50b558b64fc70 Mon Sep 17 00:00:00 2001 From: Julian Date: Mon, 16 Mar 2026 21:08:52 +0100 Subject: [PATCH 3/5] Use Vec of entries instead of hardcoded pairs in fuzz targets --- fuzz/fuzz_targets/fuzz_emf.rs | 80 +++++++++++++++------------------- fuzz/fuzz_targets/fuzz_json.rs | 66 ++++++++++++---------------- 2 files changed, 64 insertions(+), 82 deletions(-) diff --git a/fuzz/fuzz_targets/fuzz_emf.rs b/fuzz/fuzz_targets/fuzz_emf.rs index a3128128..b9d84ca2 100644 --- a/fuzz/fuzz_targets/fuzz_emf.rs +++ b/fuzz/fuzz_targets/fuzz_emf.rs @@ -147,8 +147,6 @@ struct FuzzEmfConfig { extra_namespace: Option, log_group_name: Option, allow_ignored_dimensions: bool, - sample_rate_a: f32, - sample_rate_b: f32, } impl<'a> arbitrary::Arbitrary<'a> for FuzzEmfConfig { @@ -183,8 +181,6 @@ impl<'a> arbitrary::Arbitrary<'a> for FuzzEmfConfig { extra_namespace, log_group_name, allow_ignored_dimensions: u.arbitrary()?, - sample_rate_a: arbitrary_sample_rate(u)?, - sample_rate_b: arbitrary_sample_rate(u)?, }) } } @@ -203,55 +199,49 @@ fn build_emf(config: &FuzzEmfConfig) -> Emf { fuzz_target!(|data: &[u8]| { let mut u = Unstructured::new(data); - let Ok((entry_a, entry_b, config)) = u.arbitrary::<(FuzzEntry, FuzzEntry, FuzzEmfConfig)>() - else { + let Ok(config) = u.arbitrary::() else { return; }; - - // Generate flag modes for each entry's fields. - let flags_a: Vec = (0..entry_a.fields.len()) - .map(|_| u.arbitrary().unwrap_or(FuzzMetricFlagMode::None)) - .collect(); - let flags_b: Vec = (0..entry_b.fields.len()) - .map(|_| u.arbitrary().unwrap_or(FuzzMetricFlagMode::None)) - .collect(); - - let emf_entry_a = EmfFuzzEntry { - inner: entry_a, - flag_modes: flags_a, - }; - let emf_entry_b = EmfFuzzEntry { - inner: entry_b, - flag_modes: flags_b, + // 1–4 entries to format through the same formatter instance. + let entry_count = match u.arbitrary::() { + Ok(n) => (n % 4) as usize + 1, + Err(_) => return, }; + let mut entries = Vec::with_capacity(entry_count); + for _ in 0..entry_count { + let Ok(entry) = u.arbitrary::() else { + return; + }; + let flags: Vec = (0..entry.fields.len()) + .map(|_| u.arbitrary().unwrap_or(FuzzMetricFlagMode::None)) + .collect(); + entries.push(EmfFuzzEntry { + inner: entry, + flag_modes: flags, + }); + } - // Regular EMF path. + // Regular EMF path, format all entries through the same formatter. let mut format = build_emf(&config); let mut output = Vec::new(); - - let result = format.format(&emf_entry_a, &mut output); - - if let Ok(()) = result { - assert_valid_json_lines(&output, "first call"); - } - - // Test formatter state reuse with a different entry. - output.clear(); - let result = format.format(&emf_entry_b, &mut output); - if let Ok(()) = result { - assert_valid_json_lines(&output, "state reuse call"); + for (i, entry) in entries.iter().enumerate() { + output.clear(); + let result = format.format(entry, &mut output); + if let Ok(()) = result { + assert_valid_json_lines(&output, &format!("entry {i}")); + } } - // Sampled EMF path. + // Sampled EMF path, same entries, fresh formatter. let mut sampled = build_emf(&config).with_sampling(); - output.clear(); - let result = sampled.format_with_sample_rate(&emf_entry_a, &mut output, config.sample_rate_a); - if let Ok(()) = result { - assert_valid_json_lines(&output, "sampled first call"); - } - output.clear(); - let result = sampled.format_with_sample_rate(&emf_entry_b, &mut output, config.sample_rate_b); - if let Ok(()) = result { - assert_valid_json_lines(&output, "sampled state reuse call"); + for (i, entry) in entries.iter().enumerate() { + let Ok(rate) = arbitrary_sample_rate(&mut u) else { + return; + }; + output.clear(); + let result = sampled.format_with_sample_rate(entry, &mut output, rate); + if let Ok(()) = result { + assert_valid_json_lines(&output, &format!("sampled entry {i}")); + } } }); diff --git a/fuzz/fuzz_targets/fuzz_json.rs b/fuzz/fuzz_targets/fuzz_json.rs index 19fe524d..bc74b49b 100644 --- a/fuzz/fuzz_targets/fuzz_json.rs +++ b/fuzz/fuzz_targets/fuzz_json.rs @@ -51,49 +51,41 @@ fn assert_valid_json_line(output: &[u8], context: &str) { fuzz_target!(|data: &[u8]| { let mut u = Unstructured::new(data); - let Ok((entry_a, entry_b)) = - u.arbitrary::<(FuzzEntry, FuzzEntry)>() - else { - return; + // 1–4 entries to format through the same formatter instance. + let entry_count = match u.arbitrary::() { + Ok(n) => (n % 4) as usize + 1, + Err(_) => return, }; + let mut entries = Vec::with_capacity(entry_count); + for _ in 0..entry_count { + let Ok(entry) = u.arbitrary::() else { + return; + }; + entries.push(entry); + } - // Regular (non-sampled) path + // Regular (non-sampled) path — format all entries through the same formatter. + // We don't care if formatting returns a validation error, but it must never panic. let mut format = Json::new(); let mut output = Vec::new(); - - // Format the entry, we don't care if it returns a validation error, - // but it must never panic. - let result = format.format(&entry_a, &mut output); - - if let Ok(()) = result { - assert_valid_json_line(&output, "first call"); - } - - // Format a different entry through the same formatter to test state reuse. - output.clear(); - let result = format.format(&entry_b, &mut output); - if let Ok(()) = result { - assert_valid_json_line(&output, "state reuse call"); + for (i, entry) in entries.iter().enumerate() { + output.clear(); + let result = format.format(entry, &mut output); + if let Ok(()) = result { + assert_valid_json_line(&output, &format!("entry {i}")); + } } - // Sampled path - let Ok(rate_a) = arbitrary_sample_rate(&mut u) else { - return; - }; - let Ok(rate_b) = arbitrary_sample_rate(&mut u) else { - return; - }; - + // Sampled path, same entries, fresh formatter. let mut sampled = Json::new().with_sampling(); - output.clear(); - let result = sampled.format_with_sample_rate(&entry_a, &mut output, rate_a); - if let Ok(()) = result { - assert_valid_json_line(&output, "sampled first call"); - } - - output.clear(); - let result = sampled.format_with_sample_rate(&entry_b, &mut output, rate_b); - if let Ok(()) = result { - assert_valid_json_line(&output, "sampled state reuse call"); + for (i, entry) in entries.iter().enumerate() { + let Ok(rate) = arbitrary_sample_rate(&mut u) else { + return; + }; + output.clear(); + let result = sampled.format_with_sample_rate(entry, &mut output, rate); + if let Ok(()) = result { + assert_valid_json_line(&output, &format!("sampled entry {i}")); + } } }); From ac5cebd865aa474af0cb71b0a563ee22b24d9703 Mon Sep 17 00:00:00 2001 From: Julian Date: Tue, 17 Mar 2026 13:16:09 +0100 Subject: [PATCH 4/5] Simplify fuzz generators and CI --- .github/workflows/fuzz.yml | 49 +---- fuzz/README.md | 22 +- fuzz/fuzz_targets/fuzz_emf.rs | 115 +++------- fuzz/fuzz_targets/fuzz_entry.rs | 361 ++++++++------------------------ fuzz/fuzz_targets/fuzz_json.rs | 26 +-- 5 files changed, 129 insertions(+), 444 deletions(-) diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index b2d7e99c..35163357 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -38,7 +38,7 @@ jobs: name: fuzz-artifacts-pr path: fuzz/artifacts/ - # Longer nightly run with corpus caching. + # Longer nightly run fuzz-nightly: name: Nightly Fuzz if: github.event_name != 'pull_request' @@ -48,63 +48,18 @@ jobs: steps: - uses: actions/checkout@v4 - uses: dtolnay/rust-toolchain@nightly - - name: Restore Rust build cache - uses: Swatinem/rust-cache@v2 + - uses: Swatinem/rust-cache@v2 with: shared-key: fuzz-nightly-${{ runner.os }} workspaces: | . -> target fuzz -> fuzz/target - - name: Compute corpus cache bucket - id: corpus_bucket - run: | - echo "day=$(date -u +%F)" >> "$GITHUB_OUTPUT" - echo "week=$(date -u +%G-W%V)" >> "$GITHUB_OUTPUT" - - name: Restore fuzz corpus cache - id: corpus_restore - uses: actions/cache/restore@v4 - with: - path: fuzz/corpus - # Policy: - # - one evolving cache lineage per branch per day - # - restore from same-day first, then same week, then branch, then OS-wide fallback - # - keep corpus in cache (not committed) to avoid repository bloat - key: fuzz-corpus-${{ runner.os }}-${{ github.ref_name }}-${{ steps.corpus_bucket.outputs.day }}-${{ github.run_id }} - restore-keys: | - fuzz-corpus-${{ runner.os }}-${{ github.ref_name }}-${{ steps.corpus_bucket.outputs.day }}- - fuzz-corpus-${{ runner.os }}-${{ github.ref_name }}-${{ steps.corpus_bucket.outputs.week }}- - fuzz-corpus-${{ runner.os }}-${{ github.ref_name }}- - fuzz-corpus-${{ runner.os }}- - - name: Ensure corpus directories exist - run: mkdir -p fuzz/corpus/fuzz_json fuzz/corpus/fuzz_emf - name: Install cargo-fuzz uses: dtolnay/install@cargo-fuzz - name: Run fuzz_json run: cargo +nightly fuzz run fuzz_json -- -max_total_time=300 - name: Run fuzz_emf run: cargo +nightly fuzz run fuzz_emf -- -max_total_time=300 - - name: Minimize corpus - if: always() - run: | - set -euo pipefail - for target in fuzz_json fuzz_emf; do - corpus_dir="fuzz/corpus/${target}" - if [ -d "$corpus_dir" ] && [ "$(ls -A "$corpus_dir" 2>/dev/null)" ]; then - echo "Minimizing corpus for ${target}..." - before=$(find "$corpus_dir" -type f | wc -l) - cargo +nightly fuzz cmin "$target" || true - after=$(find "$corpus_dir" -type f | wc -l) - echo "${target}: ${before} -> ${after} corpus entries" - else - echo "No corpus to minimize for ${target}" - fi - done - - name: Save fuzz corpus cache - if: always() - uses: actions/cache/save@v4 - with: - path: fuzz/corpus - key: ${{ steps.corpus_restore.outputs.cache-primary-key }} - name: Upload crash artifacts if: failure() uses: actions/upload-artifact@v4 diff --git a/fuzz/README.md b/fuzz/README.md index 1e4c030d..2e8c0c65 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -7,15 +7,15 @@ Uses [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) with libFuzzer to s - `fuzz_json`: If formatting returns `Ok(())`, output must be exactly one valid, newline-terminated JSON object. Tests both regular and sampled paths. - `fuzz_emf`: If formatting returns `Ok(())`, each emitted line must be a valid JSON object. Tests both regular and sampled paths with EMF-specific flag modes. -Both targets format two entries through the same formatter instance to exercise state reuse. Additional semantic invariants may be added in the future. +Both targets format multiple entries through the same formatter instance to exercise state reuse. ## Run Locally Requires Rust nightly and `cargo-fuzz` (`cargo install cargo-fuzz`). ```bash -cargo +nightly fuzz run fuzz_json -- -max_total_time=60 -cargo +nightly fuzz run fuzz_emf -- -max_total_time=60 +cargo +nightly fuzz run fuzz_json -- -max_total_time=60 -print_coverage=1 +cargo +nightly fuzz run fuzz_emf -- -max_total_time=60 -print_coverage=1 ``` ## Reproduce a Crash @@ -24,12 +24,6 @@ cargo +nightly fuzz run fuzz_emf -- -max_total_time=60 cargo +nightly fuzz run fuzz_json fuzz/artifacts/fuzz_json/ ``` -Then fix the bug, add a deterministic regression test, and delete the reproducer. - -## Corpus - -`fuzz/corpus` is git-ignored. Do not commit the evolving corpus. - Minimize locally with: ```bash @@ -39,11 +33,5 @@ cargo +nightly fuzz cmin fuzz_emf ## CI -Nightly GitHub Actions workflow: - -1. Restores corpus from cache -2. Runs both targets (5 min each) -3. Minimizes corpus via `cargo fuzz cmin` -4. Saves corpus back to cache - -Corpus cache uses branch-scoped daily buckets with weekly/branch fallback, so coverage accumulates across runs without committing corpus files. +- **PRs**: 1-minute smoke test per target. +- **Nightly**: 5-minute run per target (schedule + workflow_dispatch). diff --git a/fuzz/fuzz_targets/fuzz_emf.rs b/fuzz/fuzz_targets/fuzz_emf.rs index b9d84ca2..190c49bb 100644 --- a/fuzz/fuzz_targets/fuzz_emf.rs +++ b/fuzz/fuzz_targets/fuzz_emf.rs @@ -10,7 +10,7 @@ mod fuzz_entry; -use arbitrary::Unstructured; +use arbitrary::{Arbitrary, Unstructured}; use libfuzzer_sys::fuzz_target; use metrique_writer_core::format::Format; @@ -18,12 +18,10 @@ use metrique_writer_core::sample::SampledFormat; use metrique_writer_core::{Entry, EntryWriter}; use metrique_writer_format_emf::{Emf, HighStorageResolution, NoMetric}; -use fuzz_entry::{ - FuzzEntry, FuzzField, FuzzMetricValue, arbitrary_sample_rate, arbitrary_string, -}; +use fuzz_entry::{FuzzEntry, FuzzField, FuzzMetricValue}; /// EMF-specific flag mode applied on top of base fuzz entries. -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Arbitrary)] enum FuzzMetricFlagMode { None, HighStorageResolution, @@ -32,38 +30,24 @@ enum FuzzMetricFlagMode { NoMetricThenHigh, } -impl<'a> arbitrary::Arbitrary<'a> for FuzzMetricFlagMode { - fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let tag: u8 = u.arbitrary()?; - Ok(match tag % 5 { - 0 => FuzzMetricFlagMode::None, - 1 => FuzzMetricFlagMode::HighStorageResolution, - 2 => FuzzMetricFlagMode::NoMetric, - 3 => FuzzMetricFlagMode::HighThenNoMetric, - _ => FuzzMetricFlagMode::NoMetricThenHigh, - }) - } -} - /// Wrapper around `FuzzEntry` that applies EMF-specific flag modes to metrics. -#[derive(Debug)] +#[derive(Debug, Arbitrary)] struct EmfFuzzEntry { inner: FuzzEntry, - /// One flag mode per metric field. Non-metric fields use index but ignore the flag. flag_modes: Vec, } impl Entry for EmfFuzzEntry { fn write<'a>(&'a self, writer: &mut impl EntryWriter<'a>) { // Delegate config and timestamps to the base entry's logic, - // but handle fields ourselves to apply EMF flags. + // but handle fields here to apply EMF-specific flag modes. if self.inner.allow_split_entries { writer.config(&const { metrique_writer_core::config::AllowSplitEntries::new() }); } - if let Some(entry_dimensions) = &self.inner.entry_dimensions { - writer.config(entry_dimensions); + if let Some(dims) = &self.inner.entry_dimensions { + writer.config(&dims.0); } - for timestamp in &self.inner.timestamps { + if let Some(timestamp) = &self.inner.timestamp { writer.timestamp(timestamp.to_system_time()); } for (i, field) in self.inner.fields.iter().enumerate() { @@ -74,7 +58,7 @@ impl Entry for EmfFuzzEntry { .unwrap_or(FuzzMetricFlagMode::None); match field { FuzzField::StringProperty { name, value } => { - writer.value(name.as_str(), &value.as_str()); + writer.value(name.0.as_str(), &value.as_str()); } FuzzField::Metric { name, @@ -85,25 +69,25 @@ impl Entry for EmfFuzzEntry { let metric = FuzzMetricValue { observations, dimensions, - unit: *unit, + unit: unit.0, }; match flag_mode { - FuzzMetricFlagMode::None => writer.value(name.as_str(), &metric), + FuzzMetricFlagMode::None => writer.value(name.0.as_str(), &metric), FuzzMetricFlagMode::HighStorageResolution => { - writer.value(name.as_str(), &HighStorageResolution::from(metric)); + writer.value(name.0.as_str(), &HighStorageResolution::from(metric)); } FuzzMetricFlagMode::NoMetric => { - writer.value(name.as_str(), &NoMetric::from(metric)); + writer.value(name.0.as_str(), &NoMetric::from(metric)); } FuzzMetricFlagMode::HighThenNoMetric => { writer.value( - name.as_str(), + name.0.as_str(), &NoMetric::from(HighStorageResolution::from(metric)), ); } FuzzMetricFlagMode::NoMetricThenHigh => { writer.value( - name.as_str(), + name.0.as_str(), &HighStorageResolution::from(NoMetric::from(metric)), ); } @@ -140,7 +124,7 @@ fn assert_valid_json_lines(output: &[u8], context: &str) { ); } -#[derive(Debug)] +#[derive(Debug, Arbitrary)] struct FuzzEmfConfig { namespace: String, default_dimensions: Vec>, @@ -149,44 +133,15 @@ struct FuzzEmfConfig { allow_ignored_dimensions: bool, } -impl<'a> arbitrary::Arbitrary<'a> for FuzzEmfConfig { - fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let namespace = arbitrary_string(u, 48)?; - let extra_namespace = if u.arbitrary::()? { - Some(arbitrary_string(u, 48)?) - } else { - None - }; - let log_group_name = if u.arbitrary::()? { - Some(arbitrary_string(u, 64)?) - } else { - None - }; - - // Keep at least one default dimension set to match common EMF setup. - let set_count = (u.arbitrary::()? % 4) + 1; - let mut default_dimensions = Vec::with_capacity(set_count as usize); - for _ in 0..set_count { - let dim_count = u.arbitrary::()? % 5; - let mut dims = Vec::with_capacity(dim_count as usize); - for _ in 0..dim_count { - dims.push(arbitrary_string(u, 32)?); - } - default_dimensions.push(dims); - } - - Ok(Self { - namespace, - default_dimensions, - extra_namespace, - log_group_name, - allow_ignored_dimensions: u.arbitrary()?, - }) - } -} - fn build_emf(config: &FuzzEmfConfig) -> Emf { - let mut builder = Emf::builder(config.namespace.clone(), config.default_dimensions.clone()) + // Keep generation broad while normalizing invalid empty input + // into "publish without dimensions". + let default_dimensions = if config.default_dimensions.is_empty() { + vec![vec![]] + } else { + config.default_dimensions.clone() + }; + let mut builder = Emf::builder(config.namespace.clone(), default_dimensions) .allow_ignored_dimensions(config.allow_ignored_dimensions); if let Some(extra) = &config.extra_namespace { builder = builder.add_namespace(extra.clone()); @@ -202,23 +157,11 @@ fuzz_target!(|data: &[u8]| { let Ok(config) = u.arbitrary::() else { return; }; - // 1–4 entries to format through the same formatter instance. - let entry_count = match u.arbitrary::() { - Ok(n) => (n % 4) as usize + 1, - Err(_) => return, + let Ok(entries) = u.arbitrary::>() else { + return; }; - let mut entries = Vec::with_capacity(entry_count); - for _ in 0..entry_count { - let Ok(entry) = u.arbitrary::() else { - return; - }; - let flags: Vec = (0..entry.fields.len()) - .map(|_| u.arbitrary().unwrap_or(FuzzMetricFlagMode::None)) - .collect(); - entries.push(EmfFuzzEntry { - inner: entry, - flag_modes: flags, - }); + if entries.is_empty() { + return; } // Regular EMF path, format all entries through the same formatter. @@ -235,7 +178,7 @@ fuzz_target!(|data: &[u8]| { // Sampled EMF path, same entries, fresh formatter. let mut sampled = build_emf(&config).with_sampling(); for (i, entry) in entries.iter().enumerate() { - let Ok(rate) = arbitrary_sample_rate(&mut u) else { + let Ok(rate) = u.arbitrary::() else { return; }; output.clear(); diff --git a/fuzz/fuzz_targets/fuzz_entry.rs b/fuzz/fuzz_targets/fuzz_entry.rs index 4ce92b60..6a1ae332 100644 --- a/fuzz/fuzz_targets/fuzz_entry.rs +++ b/fuzz/fuzz_targets/fuzz_entry.rs @@ -11,29 +11,72 @@ use metrique_writer_core::{ Entry, EntryWriter, MetricFlags, Observation, Unit, ValueWriter, }; -// Configuration values used to control the frequency of different types of fields and observations. -/// Percentage of entries to omit entry dimensions. -const OMIT_ENTRY_DIMENSIONS_PERCENT: u8 = 55; -/// Percentage of fields to reuse existing name. -const REUSE_EXISTING_NAME_PERCENT: u8 = 70; -/// Percentage of fields to replace existing name in the pool. -const REPLACE_EXISTING_POOL_NAME_PERCENT: u8 = 20; +const EMPTY_FIELD_NAME_RATE_PERCENT: u8 = 5; -/// A single field in our fuzzed entry. +/// Field-name string for fuzzing. +/// +/// Most generated names are forced non-empty to avoid spending too much time in +/// expected validation failures, but we keep a small empty-name probability to +/// still exercise that error path. #[derive(Debug)] +pub struct FuzzFieldName(pub String); + +impl<'a> Arbitrary<'a> for FuzzFieldName { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let mut s: String = u.arbitrary()?; + if s.is_empty() { + // Keep empty names rarely (~5%) for validation-path coverage. + if u.int_in_range(0..=99)? < EMPTY_FIELD_NAME_RATE_PERCENT { + return Ok(Self(s)); + } + + s = u.arbitrary::().unwrap_or_default(); + if s.is_empty() { + s.push('x'); + } + } + Ok(Self(s)) + } +} + +/// Wrapper for `Unit` (foreign `#[non_exhaustive]` type). +#[derive(Debug, Clone, Copy)] +pub struct FuzzUnit(pub Unit); + +impl<'a> Arbitrary<'a> for FuzzUnit { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let tag: u8 = u.arbitrary()?; + Ok(Self(match tag % 11 { + 0 => Unit::None, + 1 => Unit::Count, + 2 => Unit::Percent, + 3 => Unit::Second(NegativeScale::Micro), + 4 => Unit::Second(NegativeScale::Milli), + 5 => Unit::Second(NegativeScale::One), + 6 => Unit::Byte(PositiveScale::One), + 7 => Unit::Byte(PositiveScale::Kilo), + 8 => Unit::Byte(PositiveScale::Mega), + 9 => Unit::Bit(PositiveScale::One), + _ => Unit::Bit(PositiveScale::Kilo), + })) + } +} + +/// A single field in our fuzzed entry. +#[derive(Debug, Arbitrary)] pub enum FuzzField { /// A string property like `writer.value("key", &"some string")` - StringProperty { name: String, value: String }, + StringProperty { name: FuzzFieldName, value: String }, /// A metric with one or more observations Metric { - name: String, + name: FuzzFieldName, observations: Vec, dimensions: Vec<(String, String)>, - unit: Unit, + unit: FuzzUnit, }, } -#[derive(Debug)] +#[derive(Debug, Arbitrary)] pub enum FuzzObservation { Unsigned(u64), Floating(f64), @@ -52,42 +95,6 @@ impl FuzzObservation { } } -impl<'a> Arbitrary<'a> for FuzzObservation { - fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let tag: u8 = u.arbitrary()?; - match tag % 4 { - 0 => Ok(FuzzObservation::Unsigned(u.arbitrary()?)), - 1 => Ok(FuzzObservation::Floating(arbitrary_f64(u)?)), - _ => Ok(FuzzObservation::Repeated { - total: arbitrary_f64(u)?, - // Keep this edge case frequent: repeated with 0 count. - occurrences: if u.arbitrary::()? { - 0 - } else { - u.arbitrary()? - }, - }), - } - } -} - -pub fn arbitrary_unit<'a>(u: &mut Unstructured<'a>) -> arbitrary::Result { - let tag: u8 = u.arbitrary()?; - Ok(match tag % 11 { - 0 => Unit::None, - 1 => Unit::Count, - 2 => Unit::Percent, - 3 => Unit::Second(NegativeScale::Micro), - 4 => Unit::Second(NegativeScale::Milli), - 5 => Unit::Second(NegativeScale::One), - 6 => Unit::Byte(PositiveScale::One), - 7 => Unit::Byte(PositiveScale::Kilo), - 8 => Unit::Byte(PositiveScale::Mega), - 9 => Unit::Bit(PositiveScale::One), - _ => Unit::Bit(PositiveScale::Kilo), - }) -} - #[derive(Debug, Arbitrary)] pub struct FuzzTimestamp { pub before_epoch: bool, @@ -96,29 +103,45 @@ pub struct FuzzTimestamp { impl FuzzTimestamp { pub fn to_system_time(&self) -> SystemTime { - // Keep values bounded to avoid pathological durations. - let secs = self.secs % (365 * 500 * 24 * 3600); - let duration = Duration::from_secs(secs); + let duration = Duration::from_secs(self.secs); if self.before_epoch { SystemTime::UNIX_EPOCH .checked_sub(duration) .unwrap_or(SystemTime::UNIX_EPOCH) } else { - SystemTime::UNIX_EPOCH + duration + SystemTime::UNIX_EPOCH + .checked_add(duration) + .unwrap_or(SystemTime::UNIX_EPOCH) } } } +/// Wrapper for `EntryDimensions` (foreign type that can't derive `Arbitrary`). +/// Stores `EntryDimensions` directly because `writer.config()` borrows it for `'a`. +#[derive(Debug)] +pub struct FuzzEntryDimensions(pub EntryDimensions); + +impl<'a> Arbitrary<'a> for FuzzEntryDimensions { + fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { + let sets: Vec> = u.arbitrary()?; + let sets: Vec]>> = sets + .into_iter() + .map(|dims| Cow::Owned(dims.into_iter().map(Cow::Owned).collect())) + .collect(); + Ok(Self(EntryDimensions::new(Cow::Owned(sets)))) + } +} + /// Fuzzed entry that exercises the full `EntryWriter` interface. /// /// This is a format-agnostic entry: it writes metrics directly without /// format-specific wrappers (like EMF flags). Format-specific fuzz targets /// can wrap this to add their own behavior. -#[derive(Debug)] +#[derive(Debug, Arbitrary)] pub struct FuzzEntry { - pub timestamps: Vec, + pub timestamp: Option, pub allow_split_entries: bool, - pub entry_dimensions: Option, + pub entry_dimensions: Option, pub fields: Vec, } @@ -127,16 +150,16 @@ impl Entry for FuzzEntry { if self.allow_split_entries { writer.config(&const { AllowSplitEntries::new() }); } - if let Some(entry_dimensions) = &self.entry_dimensions { - writer.config(entry_dimensions); + if let Some(dims) = &self.entry_dimensions { + writer.config(&dims.0); } - for timestamp in &self.timestamps { + if let Some(timestamp) = &self.timestamp { writer.timestamp(timestamp.to_system_time()); } for field in &self.fields { match field { FuzzField::StringProperty { name, value } => { - writer.value(name.as_str(), &value.as_str()); + writer.value(name.0.as_str(), &value.as_str()); } FuzzField::Metric { name, @@ -147,9 +170,9 @@ impl Entry for FuzzEntry { let metric = FuzzMetricValue { observations, dimensions, - unit: *unit, + unit: unit.0, }; - writer.value(name.as_str(), &metric); + writer.value(name.0.as_str(), &metric); } } } @@ -176,219 +199,3 @@ impl metrique_writer_core::value::Value for FuzzMetricValue<'_> { ); } } - -impl<'a> Arbitrary<'a> for FuzzEntry { - fn arbitrary(u: &mut Unstructured<'a>) -> arbitrary::Result { - let timestamp_count = match u.arbitrary::()? % 8 { - 0 => 0, - 1 | 2 | 3 | 4 => 1, - 5 => 2, - 6 => 3, - _ => 4, - }; - let timestamps: Vec = (0..timestamp_count) - .map(|_| u.arbitrary()) - .collect::>()?; - let allow_split_entries: bool = u.arbitrary()?; - - let entry_dimensions = if chance_percent(u, OMIT_ENTRY_DIMENSIONS_PERCENT)? { - None - } else { - Some(arbitrary_entry_dimensions(u)?) - }; - - // 0-24 fields per entry, with occasional larger cases. - let field_count = match u.arbitrary::()? % 8 { - 0 => 0, - 1 => 1, - 2 => 2, - 3 => 4, - 4 => 8, - 5 => 12, - 6 => 16, - _ => 24, - }; - - let mut name_pool = Vec::new(); - let fields: Vec = (0..field_count) - .map(|_| arbitrary_field(u, &mut name_pool)) - .collect::>()?; - - Ok(FuzzEntry { - timestamps, - allow_split_entries, - entry_dimensions, - fields, - }) - } -} - -fn arbitrary_field<'a>( - u: &mut Unstructured<'a>, - name_pool: &mut Vec, -) -> arbitrary::Result { - let is_string: bool = u.arbitrary()?; - let reuse_name = !name_pool.is_empty() && chance_percent(u, REUSE_EXISTING_NAME_PERCENT)?; - let name = if reuse_name { - let idx = choose_index(u, name_pool.len())?; - name_pool[idx].clone() - } else { - let name = arbitrary_string(u, 96)?; - if !name_pool.is_empty() && chance_percent(u, REPLACE_EXISTING_POOL_NAME_PERCENT)? { - // Keep pool bounded and still churn names. - let idx = choose_index(u, name_pool.len())?; - name_pool[idx] = name.clone(); - } else if name_pool.len() < 16 { - name_pool.push(name.clone()); - } - name - }; - - if is_string { - let value = arbitrary_string(u, 192)?; - Ok(FuzzField::StringProperty { name, value }) - } else { - let obs_count = match u.arbitrary::()? % 8 { - 0 => 0, - 1 => 1, - 2 => 2, - 3 => 3, - 4 => 4, - 5 => 8, - 6 => 16, - _ => 24, - }; - let observations: Vec = (0..obs_count) - .map(|_| u.arbitrary()) - .collect::>()?; - - let dim_count = match u.arbitrary::()? % 6 { - 0 => 0, - 1 => 1, - 2 => 2, - 3 => 3, - 4 => 6, - _ => 12, - }; - let dimensions: Vec<(String, String)> = (0..dim_count) - .map(|_| Ok((arbitrary_string(u, 48)?, arbitrary_string(u, 64)?))) - .collect::>()?; - - Ok(FuzzField::Metric { - name, - observations, - dimensions, - unit: arbitrary_unit(u)?, - }) - } -} - -pub fn arbitrary_entry_dimensions<'a>( - u: &mut Unstructured<'a>, -) -> arbitrary::Result { - let set_count = match u.arbitrary::()? % 6 { - 0 => 0, - 1 => 1, - 2 => 2, - 3 => 3, - 4 => 4, - _ => 6, - }; - let mut sets: Vec]>> = Vec::with_capacity(set_count); - for _ in 0..set_count { - let dim_count = match u.arbitrary::()? % 6 { - 0 => 0, - 1 => 1, - 2 => 2, - 3 => 3, - 4 => 4, - _ => 6, - }; - let mut dims: Vec> = Vec::with_capacity(dim_count); - for _ in 0..dim_count { - dims.push(Cow::Owned(arbitrary_string(u, 48)?)); - } - sets.push(Cow::Owned(dims)); - } - Ok(EntryDimensions::new(Cow::Owned(sets))) -} - -pub fn arbitrary_string<'a>(u: &mut Unstructured<'a>, max_len: usize) -> arbitrary::Result { - let len = (u.arbitrary::()? as usize).min(max_len); - let mut s = String::with_capacity(len); - for _ in 0..len { - s.push(arbitrary_char(u)?); - } - Ok(s) -} - -pub fn arbitrary_char<'a>(u: &mut Unstructured<'a>) -> arbitrary::Result { - const JSON_ESCAPES: [char; 6] = ['"', '\\', '\n', '\r', '\t', '\u{08}']; - const DELIMS: [char; 6] = ['{', '}', '[', ']', ':', ',']; - let bucket: u8 = u.arbitrary()?; - match bucket % 10 { - 0 => { - let idx = (u.arbitrary::()? as usize) % JSON_ESCAPES.len(); - Ok(JSON_ESCAPES[idx]) - } - 1 => { - let control = u.int_in_range(0..=0x1f)?; - Ok(char::from(control)) - } - 2 => { - let idx = (u.arbitrary::()? as usize) % DELIMS.len(); - Ok(DELIMS[idx]) - } - 3 => Ok(if u.arbitrary::()? { ' ' } else { '\n' }), - _ => u.arbitrary::(), - } -} - -pub fn arbitrary_f64<'a>(u: &mut Unstructured<'a>) -> arbitrary::Result { - let choice: u8 = u.arbitrary()?; - Ok(match choice % 12 { - 0 => f64::NAN, - 1 => f64::INFINITY, - 2 => f64::NEG_INFINITY, - 3 => -0.0, - 4 => 0.0, - 5 => f64::MAX, - 6 => f64::MIN, - 7 => f64::MIN_POSITIVE, - 8 => f64::from_bits(1), - _ => f64::from_bits(u.arbitrary()?), - }) -} - -pub fn chance_percent<'a>(u: &mut Unstructured<'a>, percent: u8) -> arbitrary::Result { - debug_assert!(percent <= 100); - if percent == 0 { - return Ok(false); - } - if percent == 100 { - return Ok(true); - } - let roll: u8 = u.int_in_range(0..=99)?; - Ok(roll < percent) -} - -pub fn choose_index<'a>(u: &mut Unstructured<'a>, len: usize) -> arbitrary::Result { - debug_assert!(len > 0); - u.int_in_range(0..=len - 1) -} - -/// Generate a fuzzed sample rate with bias toward edge cases. -pub fn arbitrary_sample_rate<'a>(u: &mut Unstructured<'a>) -> arbitrary::Result { - let selector: u8 = u.arbitrary()?; - Ok(match selector % 10 { - 0 => f32::NAN, - 1 => 0.0, - 2 => -1.0, - 3 => f32::INFINITY, - 4 => 1.0, - 5 => 0.5, - 6 => 0.001, - 7 => 1e-30, - _ => f32::from_bits(u.arbitrary()?), - }) -} diff --git a/fuzz/fuzz_targets/fuzz_json.rs b/fuzz/fuzz_targets/fuzz_json.rs index bc74b49b..8012af0b 100644 --- a/fuzz/fuzz_targets/fuzz_json.rs +++ b/fuzz/fuzz_targets/fuzz_json.rs @@ -9,14 +9,13 @@ mod fuzz_entry; -use arbitrary::Unstructured; use libfuzzer_sys::fuzz_target; use metrique_writer_core::format::Format; use metrique_writer_core::sample::SampledFormat; use metrique_writer_format_json::Json; -use fuzz_entry::{FuzzEntry, arbitrary_sample_rate}; +use fuzz_entry::FuzzEntry; /// Assert that output is exactly one newline-terminated JSON object. /// @@ -28,7 +27,7 @@ fn assert_valid_json_line(output: &[u8], context: &str) { String::from_utf8_lossy(output), ); - // Strip the trailing newline; the remainder must contain no newlines. + // Strip the trailing newline, the remainder must contain no newlines. let body = &output[..output.len() - 1]; assert!( !body.contains(&b'\n'), @@ -50,22 +49,15 @@ fn assert_valid_json_line(output: &[u8], context: &str) { } fuzz_target!(|data: &[u8]| { - let mut u = Unstructured::new(data); - // 1–4 entries to format through the same formatter instance. - let entry_count = match u.arbitrary::() { - Ok(n) => (n % 4) as usize + 1, - Err(_) => return, + let mut u = arbitrary::Unstructured::new(data); + let Ok(entries) = u.arbitrary::>() else { + return; }; - let mut entries = Vec::with_capacity(entry_count); - for _ in 0..entry_count { - let Ok(entry) = u.arbitrary::() else { - return; - }; - entries.push(entry); + if entries.is_empty() { + return; } - // Regular (non-sampled) path — format all entries through the same formatter. - // We don't care if formatting returns a validation error, but it must never panic. + // Regular (non-sampled) path, all entries through the same formatter. let mut format = Json::new(); let mut output = Vec::new(); for (i, entry) in entries.iter().enumerate() { @@ -79,7 +71,7 @@ fuzz_target!(|data: &[u8]| { // Sampled path, same entries, fresh formatter. let mut sampled = Json::new().with_sampling(); for (i, entry) in entries.iter().enumerate() { - let Ok(rate) = arbitrary_sample_rate(&mut u) else { + let Ok(rate) = u.arbitrary::() else { return; }; output.clear(); From 4bfd69b56d47ada7371de0ee6f7ffa25364c2164 Mon Sep 17 00:00:00 2001 From: Julian Date: Tue, 17 Mar 2026 13:40:01 +0100 Subject: [PATCH 5/5] Split pr/nightly fuzz workflows --- .github/workflows/fuzz-nightly.yml | 36 ++++++++++++++++++++++++++++ .github/workflows/fuzz.yml | 38 ------------------------------ 2 files changed, 36 insertions(+), 38 deletions(-) create mode 100644 .github/workflows/fuzz-nightly.yml diff --git a/.github/workflows/fuzz-nightly.yml b/.github/workflows/fuzz-nightly.yml new file mode 100644 index 00000000..c268a0a5 --- /dev/null +++ b/.github/workflows/fuzz-nightly.yml @@ -0,0 +1,36 @@ +name: Nightly Fuzz +on: + workflow_dispatch: + schedule: + - cron: "0 6 * * *" # Every day at 06:00 UTC + +permissions: + contents: read + +jobs: + fuzz-nightly: + name: Nightly Fuzz + runs-on: ubuntu-latest + env: + RUST_BACKTRACE: 1 + steps: + - uses: actions/checkout@v4 + - uses: dtolnay/rust-toolchain@nightly + - uses: Swatinem/rust-cache@v2 + with: + shared-key: fuzz-nightly-${{ runner.os }} + workspaces: | + . -> target + fuzz -> fuzz/target + - name: Install cargo-fuzz + uses: dtolnay/install@cargo-fuzz + - name: Run fuzz_json + run: cargo +nightly fuzz run fuzz_json -- -max_total_time=300 + - name: Run fuzz_emf + run: cargo +nightly fuzz run fuzz_emf -- -max_total_time=300 + - name: Upload crash artifacts + if: failure() + uses: actions/upload-artifact@v4 + with: + name: fuzz-artifacts + path: fuzz/artifacts/ diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 35163357..e03fdd04 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -1,9 +1,6 @@ name: Fuzz on: pull_request: - workflow_dispatch: - schedule: - - cron: "0 6 * * *" # Every day at 06:00 UTC permissions: contents: read @@ -31,38 +28,3 @@ jobs: run: cargo +nightly fuzz run fuzz_json -- -max_total_time=60 - name: Run fuzz_emf run: cargo +nightly fuzz run fuzz_emf -- -max_total_time=60 - - name: Upload crash artifacts - if: failure() - uses: actions/upload-artifact@v4 - with: - name: fuzz-artifacts-pr - path: fuzz/artifacts/ - - # Longer nightly run - fuzz-nightly: - name: Nightly Fuzz - if: github.event_name != 'pull_request' - runs-on: ubuntu-latest - env: - RUST_BACKTRACE: 1 - steps: - - uses: actions/checkout@v4 - - uses: dtolnay/rust-toolchain@nightly - - uses: Swatinem/rust-cache@v2 - with: - shared-key: fuzz-nightly-${{ runner.os }} - workspaces: | - . -> target - fuzz -> fuzz/target - - name: Install cargo-fuzz - uses: dtolnay/install@cargo-fuzz - - name: Run fuzz_json - run: cargo +nightly fuzz run fuzz_json -- -max_total_time=300 - - name: Run fuzz_emf - run: cargo +nightly fuzz run fuzz_emf -- -max_total_time=300 - - name: Upload crash artifacts - if: failure() - uses: actions/upload-artifact@v4 - with: - name: fuzz-artifacts - path: fuzz/artifacts/