Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions cpp/src/arrow/ipc/message.cc
Original file line number Diff line number Diff line change
Expand Up @@ -565,6 +565,17 @@ Status DecodeMessage(MessageDecoder* decoder, io::InputStream* file) {
auto metadata_length = decoder->next_required_size();
ARROW_ASSIGN_OR_RAISE(auto metadata, file->Read(metadata_length));
if (metadata->size() != metadata_length) {
// The first sizeof(int32_t) bytes of the Arrow file magic ("ARRO") may have been
// misread as metadata_length. Check if the remaining bytes complete the magic.
const auto remaining_magic = internal::kArrowMagicBytes.substr(sizeof(int32_t));
if (metadata->size() >= static_cast<int64_t>(remaining_magic.size()) &&
std::string_view(reinterpret_cast<const char*>(metadata->data()),
remaining_magic.size()) == remaining_magic) {
return Status::Invalid("Expected to read ", metadata_length,
" metadata bytes, but only read ", metadata->size(),
". This appears to be an Arrow IPC file. "
"Try the IPC file reader instead of the IPC stream reader.");
}
return Status::Invalid("Expected to read ", metadata_length, " metadata bytes, but ",
"only read ", metadata->size());
}
Expand Down
46 changes: 46 additions & 0 deletions cpp/src/arrow/ipc/read_write_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2265,6 +2265,52 @@ TEST(TestRecordBatchStreamReader, MalformedInput) {
ASSERT_RAISES(Invalid, RecordBatchStreamReader::Open(&garbage_reader));
}

TEST(TestRecordBatchStreamReader, OpenFileFormatSuggestsFileReader) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(MakeIntRecordBatch(&batch));

FileWriterHelper helper;
ASSERT_OK(helper.Init(batch->schema(), IpcWriteOptions::Defaults()));
ASSERT_OK(helper.WriteBatch(batch));
ASSERT_OK(helper.Finish());

io::BufferReader reader(helper.buffer_);
// Check we mention using the file_reader when we detect file format
EXPECT_RAISES_WITH_MESSAGE_THAT(Invalid, ::testing::HasSubstr("file reader"),
RecordBatchStreamReader::Open(&reader));
}

TEST(TestRecordBatchStreamReader, CorruptDataDoesNotSuggestFileReader) {
// Continuation marker + metadata_length = 100, then 8 bytes of non-magic data.
const std::string corrupt(
"\xff\xff\xff\xff"
"\x64\x00\x00\x00"
"ABABABAB",
16);
auto buffer = std::make_shared<Buffer>(corrupt);
io::BufferReader reader(buffer);
// Validate that we don't suggest file reader when file is just corrupt
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid, ::testing::Not(::testing::HasSubstr("file reader")),
RecordBatchStreamReader::Open(&reader));
}

TEST(TestRecordBatchFileReader, OpenStreamFormatSuggestsStreamReader) {
std::shared_ptr<RecordBatch> batch;
ASSERT_OK(MakeIntRecordBatch(&batch));

StreamWriterHelper helper;
ASSERT_OK(helper.Init(batch->schema(), IpcWriteOptions::Defaults()));
ASSERT_OK(helper.WriteBatch(batch));
ASSERT_OK(helper.Finish());

auto buf_reader = std::make_shared<io::BufferReader>(helper.buffer_);
// Check we mention using the stream_reader when we detect stream format
EXPECT_RAISES_WITH_MESSAGE_THAT(
Invalid, ::testing::HasSubstr("stream reader"),
RecordBatchFileReader::Open(buf_reader.get(), helper.buffer_->size()));
}

class EndlessCollectListener : public CollectListener {
public:
EndlessCollectListener() : CollectListener(), decoder_(nullptr) {}
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/arrow/ipc/reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1890,7 +1890,9 @@ class RecordBatchFileReaderImpl : public RecordBatchFileReader {
const auto magic_start = buffer->data() + sizeof(int32_t);
if (std::string_view(reinterpret_cast<const char*>(magic_start), kMagicSize) !=
kArrowMagicBytes) {
return Status::Invalid("Not an Arrow file");
return Status::Invalid(
"Not an Arrow file. If this is an Arrow IPC streaming format file, use "
"the IPC stream reader instead.");
}

int32_t footer_length = bit_util::FromLittleEndian(
Expand Down