Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions datafusion/physical-expr-common/src/binary_view_map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ impl ArrowBytesViewSet {

/// Inserts each value from `values` into the set
pub fn insert(&mut self, values: &ArrayRef) {
fn make_payload_fn(_value: Option<&[u8]>) {}
fn make_payload_fn() {}
fn observe_payload_fn(_payload: ()) {}
self.0
.insert_if_new(values, make_payload_fn, observe_payload_fn);
Expand Down Expand Up @@ -209,7 +209,7 @@ where
make_payload_fn: MP,
observe_payload_fn: OP,
) where
MP: FnMut(Option<&[u8]>) -> V,
MP: FnMut() -> V,
OP: FnMut(V),
{
// Sanity check array type
Expand Down Expand Up @@ -248,7 +248,7 @@ where
mut make_payload_fn: MP,
mut observe_payload_fn: OP,
) where
MP: FnMut(Option<&[u8]>) -> V,
MP: FnMut() -> V,
OP: FnMut(V),
B: ByteViewType,
{
Expand Down Expand Up @@ -279,7 +279,7 @@ where
let payload = if let Some(&(payload, _offset)) = self.null.as_ref() {
payload
} else {
let payload = make_payload_fn(None);
let payload = make_payload_fn();
let null_index = self.views.len();
self.views.push(0);
self.nulls.append_null();
Expand All @@ -292,6 +292,7 @@ where

// Extract length from the view (first 4 bytes of u128 in little-endian)
let len = view_u128 as u32;
let is_inline = len <= 12;

// Check if value already exists
let maybe_payload = {
Expand All @@ -306,7 +307,7 @@ where
}

// Fast path: inline strings can be compared directly
if len <= 12 {
if is_inline {
return header.view == view_u128;
}

Expand Down Expand Up @@ -339,17 +340,20 @@ where
payload
} else {
// no existing value, make a new one
let value: &[u8] = values.value(i).as_ref();
let payload = make_payload_fn(Some(value));

// Create view pointing to our buffers
let new_view = self.append_value(value);
let payload = make_payload_fn();
let new_view = if is_inline {
self.views.push(view_u128);
self.nulls.append_non_null();
view_u128
} else {
let value: &[u8] = values.value(i).as_ref();
self.append_value(value)
};
let new_header = Entry {
view: new_view,
hash,
payload,
};

self.map
.insert_accounted(new_header, |h| h.hash, &mut self.map_size);
payload
Expand Down Expand Up @@ -726,16 +730,12 @@ mod tests {
}

// insert the values into the map, recording what we did
let mut seen_new_strings = vec![];
let mut seen_indexes = vec![];
self.map.insert_if_new(
&arr,
|s| {
let value = s
.map(|s| String::from_utf8(s.to_vec()).expect("Non utf8 string"));
|| {
let index = next_index;
next_index += 1;
seen_new_strings.push(value);
TestPayload { index }
},
|payload| {
Expand All @@ -744,7 +744,7 @@ mod tests {
);

assert_eq!(actual_seen_indexes, seen_indexes);
assert_eq!(actual_new_strings, seen_new_strings);
assert_eq!(next_index, self.indexes.len());
}

/// Call `self.map.into_array()` validating that the strings are in the same
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ impl GroupValues for GroupValuesBytesView {
self.map.insert_if_new(
arr,
// called for each new group
|_value| {
|| {
// assign new group index on each insert
let group_idx = self.num_groups;
self.num_groups += 1;
Expand Down
Loading