Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions cpp/src/arrow/integration/json_integration_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,51 @@ TEST(TestJsonFileReadWrite, JsonExample6) {
AssertArraysEqual(*batch->column(0), *expected_array);
}

static void AssertInvalidBinaryViewJson(const std::string& json_array) {
rj::Document d;
// Pass explicit size to avoid ASAN issues with SIMD loads in RapidJson.
d.Parse(json_array.data(), json_array.size());
ASSERT_FALSE(d.HasParseError());

ASSERT_RAISES(Invalid,
json::ReadArray(default_memory_pool(), d, field("f", binary_view())));
}

TEST(TestJsonFileReadWrite, BinaryViewRejectsNegativeSize) {
AssertInvalidBinaryViewJson(R"({
"name": "f",
"count": 1,
"VALIDITY": [1],
"VIEWS": [{"SIZE": -1, "INLINED": ""}],
"VARIADIC_DATA_BUFFERS": []
})");
}

TEST(TestJsonFileReadWrite, BinaryViewRejectsInlineLengthMismatch) {
AssertInvalidBinaryViewJson(R"({
"name": "f",
"count": 1,
"VALIDITY": [1],
"VIEWS": [{"SIZE": 4, "INLINED": "x"}],
"VARIADIC_DATA_BUFFERS": []
})");
}

TEST(TestJsonFileReadWrite, BinaryViewRejectsOutOfBoundsReference) {
AssertInvalidBinaryViewJson(R"({
"name": "f",
"count": 1,
"VALIDITY": [1],
"VIEWS": [{
"SIZE": 20,
"PREFIX_HEX": "00010203",
"BUFFER_INDEX": 0,
"OFFSET": 0
}],
"VARIADIC_DATA_BUFFERS": ["0001020304050607"]
})");
}

class TestJsonRoundTrip : public ::testing::TestWithParam<MakeRecordBatch*> {
public:
void SetUp() {}
Expand Down
62 changes: 50 additions & 12 deletions cpp/src/arrow/integration/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <memory>
Expand Down Expand Up @@ -1497,20 +1498,31 @@ class ArrayReader {

auto json_size = json_view_obj.FindMember("SIZE");
RETURN_NOT_INT("SIZE", json_size, json_view_obj);
DCHECK_GE(json_size->value.GetInt64(), 0);
auto size = static_cast<int32_t>(json_size->value.GetInt64());
auto size = json_size->value.GetInt();
if (size < 0) {
return Status::Invalid("Invalid binary view SIZE: ", size,
". Expected a non-negative value");
}

if (size <= BinaryViewType::kInlineSize) {
auto json_inlined = json_view_obj.FindMember("INLINED");
RETURN_NOT_STRING("INLINED", json_inlined, json_view_obj);
out_view.inlined = {size, {}};

if constexpr (ViewType::is_utf8) {
DCHECK_LE(json_inlined->value.GetStringLength(), BinaryViewType::kInlineSize);
if (json_inlined->value.GetStringLength() != static_cast<rj::SizeType>(size)) {
return Status::Invalid("Invalid binary view INLINED length: ",
json_inlined->value.GetStringLength(),
". Expected exactly ", size, " bytes");
}
memcpy(&out_view.inlined.data, json_inlined->value.GetString(), size);
} else {
DCHECK_LE(json_inlined->value.GetStringLength(),
BinaryViewType::kInlineSize * 2);
if (json_inlined->value.GetStringLength() !=
static_cast<rj::SizeType>(size * 2)) {
return Status::Invalid("Invalid binary view INLINED hex length: ",
json_inlined->value.GetStringLength(),
". Expected exactly ", size * 2, " characters");
}
ARROW_ASSIGN_OR_RAISE(auto inlined, GetStringView(json_inlined->value));
RETURN_NOT_OK(ParseHexValues(inlined, out_view.inlined.data.data()));
}
Expand All @@ -1524,20 +1536,46 @@ class ArrayReader {
RETURN_NOT_INT("BUFFER_INDEX", json_buffer_index, json_view_obj);
RETURN_NOT_INT("OFFSET", json_offset, json_view_obj);

const auto buffer_index = json_buffer_index->value.GetInt();
const auto offset = json_offset->value.GetInt();
if (buffer_index < 0) {
return Status::Invalid("Invalid binary view BUFFER_INDEX: ", buffer_index,
". Expected a non-negative value");
}
if (offset < 0) {
return Status::Invalid("Invalid binary view OFFSET: ", offset,
". Expected a non-negative value");
}
if (json_prefix->value.GetStringLength() != BinaryViewType::kPrefixSize * 2) {
return Status::Invalid("Invalid binary view PREFIX_HEX length: ",
json_prefix->value.GetStringLength(),
". Expected exactly ", BinaryViewType::kPrefixSize * 2,
" characters");
}

const int64_t num_variadic_buffers = static_cast<int64_t>(buffers.size()) - 2;
if (buffer_index >= num_variadic_buffers) {
return Status::Invalid("Invalid binary view BUFFER_INDEX: ", buffer_index,
". Expected < ", num_variadic_buffers);
}

const int64_t data_buffer_size = buffers[buffer_index + 2]->size();
if (static_cast<int64_t>(offset) > data_buffer_size ||
static_cast<int64_t>(size) > data_buffer_size - static_cast<int64_t>(offset)) {
return Status::Invalid("Invalid binary view range [offset=", offset,
", size=", size, "] for data buffer of size ",
data_buffer_size);
}

out_view.ref = {
size,
{},
static_cast<int32_t>(json_buffer_index->value.GetInt64()),
static_cast<int32_t>(json_offset->value.GetInt64()),
buffer_index,
offset,
};

DCHECK_EQ(json_prefix->value.GetStringLength(), BinaryViewType::kPrefixSize * 2);
ARROW_ASSIGN_OR_RAISE(auto prefix, GetStringView(json_prefix->value));
RETURN_NOT_OK(ParseHexValues(prefix, out_view.ref.prefix.data()));

DCHECK_LE(static_cast<size_t>(out_view.ref.buffer_index), buffers.size() - 2);
DCHECK_LE(static_cast<int64_t>(out_view.ref.offset) + out_view.size(),
buffers[out_view.ref.buffer_index + 2]->size());
}

data_ = ArrayData::Make(type_, length_, std::move(buffers), null_count);
Expand Down
Loading