Skip to content
Open
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 22 additions & 8 deletions datafusion/physical-expr-common/src/binary_view_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -292,6 +292,16 @@ where

// Extract length from the view (first 4 bytes of u128 in little-endian)
let len = view_u128 as u32;
let is_inline = len <= 12;

// For non-inline strings (>12 bytes), fetch once from the array.
// to avoid redundant memory access during hash resolution and insertion.
// Inline strings (<= 12 bytes) use u128 directly.
let input_value: &[u8] = if !is_inline {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm can we just values.value(i).as_ref() to where it is used instead in the else branch ?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done — use values.value(i).as_ref() to where it's used.
Sorry for overcomplicating it 😅

values.value(i).as_ref()
} else {
&[]
};

// Check if value already exists
let maybe_payload = {
Expand All @@ -306,7 +316,7 @@ where
}

// Fast path: inline strings can be compared directly
if len <= 12 {
if is_inline {
return header.view == view_u128;
}

Expand All @@ -329,7 +339,6 @@ where
} else {
&in_progress[offset..offset + stored_len]
};
let input_value: &[u8] = values.value(i).as_ref();
stored_value == input_value
})
.map(|entry| entry.payload)
Expand All @@ -339,17 +348,22 @@ where
payload
} else {
// no existing value, make a new one
let value: &[u8] = values.value(i).as_ref();
let payload = make_payload_fn(Some(value));

// Create view pointing to our buffers
let new_view = self.append_value(value);
let (payload, new_view) = if is_inline {
// Extract inline bytes from view (only for new values)
let view_bytes = view_u128.to_le_bytes();
let payload = make_payload_fn(Some(&view_bytes[4..4 + len as usize]));
Copy link
Copy Markdown
Contributor

@Dandandan Dandandan Mar 9, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like this param to the payload fn is not really used anymore, could we remove it?

The three actual implementations just ignore the param:

        fn make_payload_fn(_value: Option<&[u8]>) {}
            |_value| {
                // assign new group index on each insert
                let group_idx = self.num_groups;
                self.num_groups += 1;
                group_idx
            },
            |_value| {
                // assign new group index on each insert
                let group_idx = self.num_groups;
                self.num_groups += 1;
                group_idx
            },

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the parameter from make_payload_fn; updated callers and test accordingly.

self.views.push(view_u128);
self.nulls.append_non_null();
(payload, view_u128)
} else {
let payload = make_payload_fn(Some(input_value));
(payload, self.append_value(input_value))
};
let new_header = Entry {
view: new_view,
hash,
payload,
};

self.map
.insert_accounted(new_header, |h| h.hash, &mut self.map_size);
payload
Expand Down
Loading