-
Notifications
You must be signed in to change notification settings - Fork 2.1k
Avoid redundant memory access in ArrowBytesViewMap for inline values #20807
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
dd4cc0b
87fcc3d
c8068d3
b00fb4b
d0d0784
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -292,6 +292,16 @@ where | |
|
|
||
| // Extract length from the view (first 4 bytes of u128 in little-endian) | ||
| let len = view_u128 as u32; | ||
| let is_inline = len <= 12; | ||
|
|
||
| // For non-inline strings (>12 bytes), fetch once from the array. | ||
| // to avoid redundant memory access during hash resolution and insertion. | ||
| // Inline strings (<= 12 bytes) use u128 directly. | ||
| let input_value: &[u8] = if !is_inline { | ||
| values.value(i).as_ref() | ||
| } else { | ||
| &[] | ||
| }; | ||
|
|
||
| // Check if value already exists | ||
| let maybe_payload = { | ||
|
|
@@ -306,7 +316,7 @@ where | |
| } | ||
|
|
||
| // Fast path: inline strings can be compared directly | ||
| if len <= 12 { | ||
| if is_inline { | ||
| return header.view == view_u128; | ||
| } | ||
|
|
||
|
|
@@ -329,7 +339,6 @@ where | |
| } else { | ||
| &in_progress[offset..offset + stored_len] | ||
| }; | ||
| let input_value: &[u8] = values.value(i).as_ref(); | ||
| stored_value == input_value | ||
| }) | ||
| .map(|entry| entry.payload) | ||
|
|
@@ -339,17 +348,22 @@ where | |
| payload | ||
| } else { | ||
| // no existing value, make a new one | ||
| let value: &[u8] = values.value(i).as_ref(); | ||
| let payload = make_payload_fn(Some(value)); | ||
|
|
||
| // Create view pointing to our buffers | ||
| let new_view = self.append_value(value); | ||
| let (payload, new_view) = if is_inline { | ||
| // Extract inline bytes from view (only for new values) | ||
| let view_bytes = view_u128.to_le_bytes(); | ||
| let payload = make_payload_fn(Some(&view_bytes[4..4 + len as usize])); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it looks like this param to the payload fn is not really used anymore, could we remove it? The three actual implementations just ignore the param:
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removed the parameter from |
||
| self.views.push(view_u128); | ||
| self.nulls.append_non_null(); | ||
| (payload, view_u128) | ||
| } else { | ||
| let payload = make_payload_fn(Some(input_value)); | ||
| (payload, self.append_value(input_value)) | ||
| }; | ||
| let new_header = Entry { | ||
| view: new_view, | ||
| hash, | ||
| payload, | ||
| }; | ||
|
|
||
| self.map | ||
| .insert_accounted(new_header, |h| h.hash, &mut self.map_size); | ||
| payload | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm can we just
values.value(i).as_ref()to where it is used instead in the else branch ?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done — use
values.value(i).as_ref()to where it's used.Sorry for overcomplicating it 😅