diff --git a/MODULE.bazel b/MODULE.bazel index 7b9106086414d..230c111ad2e77 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -482,3 +482,20 @@ http_archive( strip_prefix = "antithesis-sdk-cpp-0.4.7", urls = ["https://github.com/antithesishq/antithesis-sdk-cpp/archive/refs/tags/0.4.7.tar.gz"], ) + +# JSON parser conformance corpus used by +# //src/v/serde/json/tests:json_test_suite_test. MIT-licensed; see +# https://github.com/nst/JSONTestSuite. +http_archive( + name = "nst_json_test_suite", + build_file_content = """ +filegroup( + name = "test_parsing", + srcs = glob(["test_parsing/*.json"]), + visibility = ["//visibility:public"], +) +""", + integrity = "sha256-Mqsvs3zCZnnb2RZNu3BTvO8QE0ZD1TywzvRd52gNzcw=", + strip_prefix = "JSONTestSuite-1ef36fa01286573e846ac449e8683f8833c5b26a", + urls = ["https://github.com/nst/JSONTestSuite/archive/1ef36fa01286573e846ac449e8683f8833c5b26a.tar.gz"], +) diff --git a/src/v/serde/json/tests/BUILD b/src/v/serde/json/tests/BUILD index 246150f78bd58..16158898195a2 100644 --- a/src/v/serde/json/tests/BUILD +++ b/src/v/serde/json/tests/BUILD @@ -67,6 +67,17 @@ redpanda_cc_gtest( ], ) +redpanda_test_cc_library( + name = "test_cases", + hdrs = [ + "test_cases.h", + ], + visibility = ["//visibility:private"], + deps = [ + "//src/v/base", + ], +) + redpanda_cc_gtest( name = "json_checker_test", timeout = "short", @@ -79,6 +90,28 @@ redpanda_cc_gtest( "testdata/jsonchecker/*", ]), deps = [ + ":test_cases", + "//src/v/bytes:iobuf", + "//src/v/serde/json:parser", + "//src/v/test_utils:gtest", + "//src/v/test_utils:runfiles", + "//src/v/utils:file_io", + "@googletest//:gtest", + "@googletest//:gtest_main", + "@seastar", + ], +) + +redpanda_cc_gtest( + name = "json_test_suite_test", + timeout = "short", + srcs = [ + "json_test_suite_test.cc", + ], + cpu = 1, + data = ["@nst_json_test_suite//:test_parsing"], + deps = [ + ":test_cases", "//src/v/bytes:iobuf", "//src/v/serde/json:parser", "//src/v/test_utils:gtest", diff --git a/src/v/serde/json/tests/json_checker_test.cc b/src/v/serde/json/tests/json_checker_test.cc index 565a5c95f7097..d1b668b630651 100644 --- a/src/v/serde/json/tests/json_checker_test.cc +++ b/src/v/serde/json/tests/json_checker_test.cc @@ -10,27 +10,28 @@ */ #include "serde/json/parser.h" +#include "serde/json/tests/test_cases.h" #include "test_utils/runfiles.h" #include "test_utils/test.h" #include "utils/file_io.h" -#include +#include using namespace serde::json; -std::vector collect_test_cases(const std::string& directory); +class json_checker_test + : public seastar_test + , public ::testing::WithParamInterface {}; -ss::future<> run_json_test( - const std::string& directory, - const std::string& test_case, - std::optional config = std::nullopt) { - auto test_case_path = test_utils::get_runfile_path( - fmt::format( - "src/v/serde/json/tests/testdata/{}/{}", directory, test_case)); +TEST_P_CORO(json_checker_test, all) { + const auto& test_case_path = GetParam(); + auto filename = std::filesystem::path(test_case_path).filename().string(); auto contents = co_await read_fully(test_case_path); + // Use depth limit of 19 to properly fail fail18.json (20 levels) while + // allowing pass2.json (19 levels, "Not too deep") to succeed auto parser = serde::json::parser( - std::move(contents), config.value_or(parser_config{})); + std::move(contents), parser_config{.max_depth = 19}); while (co_await parser.next()) { // Do nothing, just drain the parser. @@ -40,71 +41,24 @@ ss::future<> run_json_test( } // The file name indicates whether parsing should succeed. - bool expected_pass = test_case.starts_with("pass"); + bool expected_pass = filename.starts_with("pass"); auto current_token = parser.token(); if (expected_pass) { - EXPECT_NE(current_token, token::error) << "Expected to pass but failed"; + EXPECT_NE(current_token, token::error) + << filename << ": expected to pass but failed"; } else { - EXPECT_EQ(current_token, token::error) << "Expected to fail but passed"; + EXPECT_EQ(current_token, token::error) + << filename << ": expected to fail but passed"; } } -class json_checker_test - : public seastar_test - , public testing::WithParamInterface {}; - -TEST_P_CORO(json_checker_test, all) { - auto test_case = GetParam(); - // Use depth limit of 19 to properly fail fail18.json (20 levels) while - // allowing pass2.json (19 levels, "Not too deep") to succeed - auto config = parser_config{.max_depth = 19}; - co_await run_json_test("jsonchecker", test_case, config); -} - INSTANTIATE_TEST_SUITE_P( json_checker_tests, json_checker_test, - testing::ValuesIn(collect_test_cases("jsonchecker"))); - -std::vector collect_test_cases(const std::string& directory) { - std::vector test_cases; - - auto test_case_path = test_utils::get_runfile_path( - fmt::format("src/v/serde/json/tests/testdata/{}", directory)); - - std::filesystem::path dir_path(test_case_path); - vassert( - std::filesystem::exists(dir_path), - "Directory does not exist: {}", - dir_path.string()); - vassert( - std::filesystem::is_directory(dir_path), - "Path is not a directory: {}", - dir_path.string()); - - for (const auto& entry : std::filesystem::directory_iterator(dir_path)) { - if (entry.is_regular_file() || entry.is_symlink()) { - std::string filename = entry.path().filename().string(); - if (filename.ends_with(".json")) { - test_cases.push_back(filename); - } - } else { - vassert( - false, - "Expecting only files but {} is not a file", - entry.path().filename().string()); - } - } - - vassert( - !test_cases.empty(), - "No test cases found in directory {}", - dir_path.string()); - - std::sort( - test_cases.begin(), - test_cases.end(), - [](const std::string& a, const std::string& b) { return a < b; }); - - return test_cases; -} + ::testing::ValuesIn( + serde::json::testing::collect_json_test_cases( + test_utils::get_runfile_path( + "src/v/serde/json/tests/testdata/jsonchecker"))), + [](const ::testing::TestParamInfo& info) { + return std::filesystem::path(info.param).stem().string(); + }); diff --git a/src/v/serde/json/tests/json_test_suite_test.cc b/src/v/serde/json/tests/json_test_suite_test.cc new file mode 100644 index 0000000000000..79cba530f5a76 --- /dev/null +++ b/src/v/serde/json/tests/json_test_suite_test.cc @@ -0,0 +1,88 @@ +/* + * Copyright 2026 Redpanda Data, Inc. + * + * Use of this software is governed by the Business Source License + * included in the file licenses/BSL.md + * + * As of the Change Date specified in that file, in accordance with + * the Business Source License, use of this software will be governed + * by the Apache License, Version 2.0 + */ + +#include "serde/json/parser.h" +#include "serde/json/tests/test_cases.h" +#include "test_utils/runfiles.h" +#include "test_utils/test.h" +#include "utils/file_io.h" + +#include + +#include +#include + +using namespace serde::json; + +/// Test suite from https://github.com/nst/JSONTestSuite. +/// +/// File-name prefixes encode the expected parser behavior per RFC 8259: +/// - y_ must be accepted +/// - n_ must be rejected +/// - i_ parsers are free to accept or reject +class json_test_suite_test + : public seastar_test + , public ::testing::WithParamInterface {}; + +TEST_P_CORO(json_test_suite_test, test_parsing) { + const auto& test_case_path = GetParam(); + auto filename = std::filesystem::path(test_case_path).filename().string(); + + auto contents = co_await read_fully(test_case_path); + auto parser = serde::json::parser(std::move(contents), parser_config{}); + + while (co_await parser.next()) { + } + + auto final_token = parser.token(); + ASSERT_TRUE_CORO(final_token == token::eof || final_token == token::error) + << "parser::next() returned false but final token is neither eof nor " + "error: " + << final_token; + bool accepted = final_token == token::eof; + + if (filename.starts_with("y_")) { + EXPECT_TRUE(accepted) << filename << ": expected accept, got reject"; + } else if (filename.starts_with("n_")) { + EXPECT_FALSE(accepted) << filename << ": expected reject, got accept"; + } else { + vassert( + filename.starts_with("i_"), + "Unexpected test case name prefix: {}", + filename); + // Implementation-defined: either outcome is acceptable. The parser + // must not crash. + } +} + +INSTANTIATE_TEST_SUITE_P( + json_test_suite, + json_test_suite_test, + ::testing::ValuesIn( + serde::json::testing::collect_json_test_cases( + test_utils::get_runfile_path("test_parsing", "nst_json_test_suite"))), + [](const ::testing::TestParamInfo& info) { + // GTest requires parameter names to match [a-zA-Z0-9_] and to be + // unique. Use the filename stem and hex-escape any non-alphanumeric + // chars (using `_XX`) to preserve uniqueness across similar names. + auto stem = std::filesystem::path(info.param).stem().string(); + std::string out; + out.reserve(stem.size()); + for (auto c : stem) { + auto uc = static_cast(c); + if (std::isalnum(uc) || uc == '_') { + out.push_back(c); + } else { + fmt::format_to(std::back_inserter(out), "_{:02x}", uc); + } + } + return out; + }); diff --git a/src/v/serde/json/tests/test_cases.h b/src/v/serde/json/tests/test_cases.h new file mode 100644 index 0000000000000..aebce3edb4943 --- /dev/null +++ b/src/v/serde/json/tests/test_cases.h @@ -0,0 +1,60 @@ +/* + * Copyright 2026 Redpanda Data, Inc. + * + * Use of this software is governed by the Business Source License + * included in the file licenses/BSL.md + * + * As of the Change Date specified in that file, in accordance with + * the Business Source License, use of this software will be governed + * by the Apache License, Version 2.0 + */ + +#pragma once + +#include "base/vassert.h" + +#include +#include +#include +#include + +namespace serde::json::testing { + +/// Returns the sorted list of full paths to `.json` files found directly +/// under `dir`. The directory is required to exist and to contain at +/// least one `.json` file — this helper is intended to back +/// INSTANTIATE_TEST_SUITE_P calls where an empty list would silently +/// produce zero test cases. Full paths (rather than bare filenames) are +/// returned so callers don't have to carry the directory through to the +/// test body. +inline std::vector +collect_json_test_cases(const std::filesystem::path& dir) { + vassert( + std::filesystem::exists(dir), + "Directory does not exist: {}", + dir.string()); + vassert( + std::filesystem::is_directory(dir), + "Path is not a directory: {}", + dir.string()); + + std::vector test_cases; + for (const auto& entry : std::filesystem::directory_iterator(dir)) { + vassert( + entry.is_regular_file() || entry.is_symlink(), + "Expected only files under {}, found: {}", + dir.string(), + entry.path().string()); + if (entry.path().extension() == ".json") { + test_cases.push_back(entry.path().string()); + } + } + + vassert( + !test_cases.empty(), "No test cases found in directory {}", dir.string()); + + std::ranges::sort(test_cases); + return test_cases; +} + +} // namespace serde::json::testing diff --git a/src/v/test_utils/runfiles.cc b/src/v/test_utils/runfiles.cc index 7e1201ffe16d3..80238537c9976 100644 --- a/src/v/test_utils/runfiles.cc +++ b/src/v/test_utils/runfiles.cc @@ -10,7 +10,7 @@ namespace test_utils { -std::string get_runfile_path(std::string_view path) { +std::string get_runfile_path(std::string_view path, std::string_view repo) { using bazel::tools::cpp::runfiles::Runfiles; std::string error; std::unique_ptr runfiles; @@ -25,7 +25,7 @@ std::string get_runfile_path(std::string_view path) { if (runfiles == nullptr) { throw std::runtime_error(error); } - return runfiles->Rlocation(fmt::format("_main/{}", path)); + return runfiles->Rlocation(fmt::format("{}/{}", repo, path)); } } // namespace test_utils diff --git a/src/v/test_utils/runfiles.h b/src/v/test_utils/runfiles.h index 8c1849cb174c0..cba63193fbf4c 100644 --- a/src/v/test_utils/runfiles.h +++ b/src/v/test_utils/runfiles.h @@ -6,12 +6,19 @@ namespace test_utils { /* - * Usage: - * - Add a `data = [path/to/some.file]` to the test target - * - Call get_runfile_path with "root/.../path/to/some.file" + * Resolve `path` to an absolute filesystem path at test runtime. * - * The return value is the path to the file. + * `path` is a repo-relative path (e.g. "src/v/.../testdata/foo.json" for + * files from this repository). The file must be listed in the test + * target's `data = [...]` attribute so Bazel stages it into the + * runfiles tree. + * + * `repo` selects which Bazel repository the path is rooted in. It + * defaults to "_main" (this repository). For files exposed by an + * external repository — e.g. a `http_archive` declared in MODULE.bazel — + * pass that repository's name. */ -std::string get_runfile_path(std::string_view); +std::string +get_runfile_path(std::string_view path, std::string_view repo = "_main"); } // namespace test_utils