diff --git a/cpp/src/arrow/integration/json_integration_test.cc b/cpp/src/arrow/integration/json_integration_test.cc index 0e84ea6124d5..23105e8ca172 100644 --- a/cpp/src/arrow/integration/json_integration_test.cc +++ b/cpp/src/arrow/integration/json_integration_test.cc @@ -1112,6 +1112,51 @@ TEST(TestJsonFileReadWrite, JsonExample6) { AssertArraysEqual(*batch->column(0), *expected_array); } +static void AssertInvalidBinaryViewJson(const std::string& json_array) { + rj::Document d; + // Pass explicit size to avoid ASAN issues with SIMD loads in RapidJson. + d.Parse(json_array.data(), json_array.size()); + ASSERT_FALSE(d.HasParseError()); + + ASSERT_RAISES(Invalid, + json::ReadArray(default_memory_pool(), d, field("f", binary_view()))); +} + +TEST(TestJsonFileReadWrite, BinaryViewRejectsNegativeSize) { + AssertInvalidBinaryViewJson(R"({ + "name": "f", + "count": 1, + "VALIDITY": [1], + "VIEWS": [{"SIZE": -1, "INLINED": ""}], + "VARIADIC_DATA_BUFFERS": [] + })"); +} + +TEST(TestJsonFileReadWrite, BinaryViewRejectsInlineLengthMismatch) { + AssertInvalidBinaryViewJson(R"({ + "name": "f", + "count": 1, + "VALIDITY": [1], + "VIEWS": [{"SIZE": 4, "INLINED": "x"}], + "VARIADIC_DATA_BUFFERS": [] + })"); +} + +TEST(TestJsonFileReadWrite, BinaryViewRejectsOutOfBoundsReference) { + AssertInvalidBinaryViewJson(R"({ + "name": "f", + "count": 1, + "VALIDITY": [1], + "VIEWS": [{ + "SIZE": 20, + "PREFIX_HEX": "00010203", + "BUFFER_INDEX": 0, + "OFFSET": 0 + }], + "VARIADIC_DATA_BUFFERS": ["0001020304050607"] + })"); +} + class TestJsonRoundTrip : public ::testing::TestWithParam { public: void SetUp() {} diff --git a/cpp/src/arrow/integration/json_internal.cc b/cpp/src/arrow/integration/json_internal.cc index 2dc6b3611802..1e57be51b060 100644 --- a/cpp/src/arrow/integration/json_internal.cc +++ b/cpp/src/arrow/integration/json_internal.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -1497,8 +1498,11 @@ class ArrayReader { auto json_size = json_view_obj.FindMember("SIZE"); RETURN_NOT_INT("SIZE", json_size, json_view_obj); - DCHECK_GE(json_size->value.GetInt64(), 0); - auto size = static_cast(json_size->value.GetInt64()); + auto size = json_size->value.GetInt(); + if (size < 0) { + return Status::Invalid("Invalid binary view SIZE: ", size, + ". Expected a non-negative value"); + } if (size <= BinaryViewType::kInlineSize) { auto json_inlined = json_view_obj.FindMember("INLINED"); @@ -1506,11 +1510,19 @@ class ArrayReader { out_view.inlined = {size, {}}; if constexpr (ViewType::is_utf8) { - DCHECK_LE(json_inlined->value.GetStringLength(), BinaryViewType::kInlineSize); + if (json_inlined->value.GetStringLength() != static_cast(size)) { + return Status::Invalid("Invalid binary view INLINED length: ", + json_inlined->value.GetStringLength(), + ". Expected exactly ", size, " bytes"); + } memcpy(&out_view.inlined.data, json_inlined->value.GetString(), size); } else { - DCHECK_LE(json_inlined->value.GetStringLength(), - BinaryViewType::kInlineSize * 2); + if (json_inlined->value.GetStringLength() != + static_cast(size * 2)) { + return Status::Invalid("Invalid binary view INLINED hex length: ", + json_inlined->value.GetStringLength(), + ". Expected exactly ", size * 2, " characters"); + } ARROW_ASSIGN_OR_RAISE(auto inlined, GetStringView(json_inlined->value)); RETURN_NOT_OK(ParseHexValues(inlined, out_view.inlined.data.data())); } @@ -1524,20 +1536,46 @@ class ArrayReader { RETURN_NOT_INT("BUFFER_INDEX", json_buffer_index, json_view_obj); RETURN_NOT_INT("OFFSET", json_offset, json_view_obj); + const auto buffer_index = json_buffer_index->value.GetInt(); + const auto offset = json_offset->value.GetInt(); + if (buffer_index < 0) { + return Status::Invalid("Invalid binary view BUFFER_INDEX: ", buffer_index, + ". Expected a non-negative value"); + } + if (offset < 0) { + return Status::Invalid("Invalid binary view OFFSET: ", offset, + ". Expected a non-negative value"); + } + if (json_prefix->value.GetStringLength() != BinaryViewType::kPrefixSize * 2) { + return Status::Invalid("Invalid binary view PREFIX_HEX length: ", + json_prefix->value.GetStringLength(), + ". Expected exactly ", BinaryViewType::kPrefixSize * 2, + " characters"); + } + + const int64_t num_variadic_buffers = static_cast(buffers.size()) - 2; + if (buffer_index >= num_variadic_buffers) { + return Status::Invalid("Invalid binary view BUFFER_INDEX: ", buffer_index, + ". Expected < ", num_variadic_buffers); + } + + const int64_t data_buffer_size = buffers[buffer_index + 2]->size(); + if (static_cast(offset) > data_buffer_size || + static_cast(size) > data_buffer_size - static_cast(offset)) { + return Status::Invalid("Invalid binary view range [offset=", offset, + ", size=", size, "] for data buffer of size ", + data_buffer_size); + } + out_view.ref = { size, {}, - static_cast(json_buffer_index->value.GetInt64()), - static_cast(json_offset->value.GetInt64()), + buffer_index, + offset, }; - DCHECK_EQ(json_prefix->value.GetStringLength(), BinaryViewType::kPrefixSize * 2); ARROW_ASSIGN_OR_RAISE(auto prefix, GetStringView(json_prefix->value)); RETURN_NOT_OK(ParseHexValues(prefix, out_view.ref.prefix.data())); - - DCHECK_LE(static_cast(out_view.ref.buffer_index), buffers.size() - 2); - DCHECK_LE(static_cast(out_view.ref.offset) + out_view.size(), - buffers[out_view.ref.buffer_index + 2]->size()); } data_ = ArrayData::Make(type_, length_, std::move(buffers), null_count);