Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ public List<Split> splitFile(
List<String> partitionValues,
SplitCreator splitCreator)
throws IOException {
if (length <= 0) {
// Zero-length files contain no data; skip to avoid sending empty splits to BE.
return Lists.newArrayList();
Copy link

Copilot AI Apr 14, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The early return for zero-length files currently allocates a new mutable list on every call. Since this is a hot path when scanning many files, consider returning Collections.emptyList() (or ImmutableList.of()) to avoid unnecessary allocations and signal immutability.

Suggested change
return Lists.newArrayList();
return ImmutableList.of();

Copilot uses AI. Check for mistakes.
}
// Pass splitCreator.create() to set target file split size to calculate split weight.
long targetFileSplitSize = specifiedFileSplitSize > 0 ? specifiedFileSplitSize : maxSplitSize;
if (blockLocations == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,30 @@ public void testNullRemainingInitialSplitIsAllowed() throws Exception {
Assert.assertEquals(1, splits.size());
}

@Test
public void testZeroLengthFileProducesNoSplits() throws Exception {
LocationPath loc = LocationPath.of("hdfs://example.com/path/emptyfile");
BlockLocation[] locations = new BlockLocation[]{new BlockLocation(null, new String[]{"h1"}, 0L, 0L)};
FileSplitter fileSplitter = new FileSplitter(32 * MB, 64 * MB, DEFAULT_INITIAL_SPLITS);
// Non-splittable zero-length file
List<Split> splits = fileSplitter.splitFile(
loc, 0L, locations, 0L, 0L, false,
Collections.emptyList(), FileSplit.FileSplitCreator.DEFAULT);
Assert.assertTrue("Zero-length file should produce no splits", splits.isEmpty());
// Splittable zero-length file
splits = fileSplitter.splitFile(
loc, 0L, locations, 0L, 0L, true,
Collections.emptyList(), FileSplit.FileSplitCreator.DEFAULT);
Assert.assertTrue("Zero-length splittable file should produce no splits", splits.isEmpty());
// Null block locations with zero-length file
splits = fileSplitter.splitFile(
loc, 0L, null, 0L, 0L, true,
Collections.emptyList(), FileSplit.FileSplitCreator.DEFAULT);
Assert.assertTrue("Zero-length file with null locations should produce no splits", splits.isEmpty());
// Counter should not be decremented for skipped zero-length files
Assert.assertEquals(DEFAULT_INITIAL_SPLITS, fileSplitter.getRemainingInitialSplitNum());
}

@Test
public void testSmallFileNoSplit() throws Exception {
LocationPath loc = LocationPath.of("hdfs://example.com/path/small");
Expand Down
Loading