-
Notifications
You must be signed in to change notification settings - Fork 1.1k
[Variant] Align cast logic for from/to_decimal for variant #9689
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
c9a092e
38dfe69
084872b
8f91b9e
9003a67
253f0b4
ce05fea
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -531,7 +531,7 @@ where | |
|
|
||
| /// Parses given string to specified decimal native (i128/i256) based on given | ||
| /// scale. Returns an `Err` if it cannot parse given string. | ||
| pub(crate) fn parse_string_to_decimal_native<T: DecimalType>( | ||
| pub fn parse_string_to_decimal_native<T: DecimalType>( | ||
| value_str: &str, | ||
| scale: usize, | ||
| ) -> Result<T::Native, ArrowError> | ||
|
|
@@ -777,15 +777,15 @@ where | |
| if cast_options.safe { | ||
| array | ||
| .unary_opt::<_, D>(|v| { | ||
| D::Native::from_f64((mul * v.as_()).round()) | ||
| single_float_to_decimal::<D>(v.as_(), mul) | ||
| .filter(|v| D::is_valid_decimal_precision(*v, precision)) | ||
| }) | ||
| .with_precision_and_scale(precision, scale) | ||
| .map(|a| Arc::new(a) as ArrayRef) | ||
| } else { | ||
| array | ||
| .try_unary::<_, D, _>(|v| { | ||
| D::Native::from_f64((mul * v.as_()).round()) | ||
| single_float_to_decimal::<D>(v.as_(), mul) | ||
| .ok_or_else(|| { | ||
| ArrowError::CastError(format!( | ||
| "Cannot cast to {}({}, {}). Overflowing on {:?}", | ||
|
|
@@ -802,6 +802,17 @@ where | |
| } | ||
| } | ||
|
|
||
| /// Cast a single floating point value to a decimal native with the given multiple. | ||
| /// Returns `None` if the value cannot be represented with the requested precision. | ||
| #[inline] | ||
| pub fn single_float_to_decimal<D>(input: f64, mul: f64) -> Option<D::Native> | ||
| where | ||
| D: DecimalType + ArrowPrimitiveType, | ||
| <D as ArrowPrimitiveType>::Native: DecimalCast, | ||
| { | ||
| D::Native::from_f64((mul * input).round()) | ||
| } | ||
|
|
||
| pub(crate) fn cast_decimal_to_integer<D, T>( | ||
| array: &dyn Array, | ||
| base: D::Native, | ||
|
|
@@ -826,84 +837,63 @@ where | |
|
|
||
| let mut value_builder = PrimitiveBuilder::<T>::with_capacity(array.len()); | ||
|
|
||
| if scale < 0 { | ||
| match cast_options.safe { | ||
| true => { | ||
| for i in 0..array.len() { | ||
| if array.is_null(i) { | ||
| value_builder.append_null(); | ||
| } else { | ||
| let v = array | ||
| .value(i) | ||
| .mul_checked(div) | ||
| .ok() | ||
| .and_then(<T::Native as NumCast>::from::<D::Native>); | ||
| value_builder.append_option(v); | ||
| } | ||
| } | ||
| } | ||
| false => { | ||
| for i in 0..array.len() { | ||
| if array.is_null(i) { | ||
| value_builder.append_null(); | ||
| } else { | ||
| let v = array.value(i).mul_checked(div)?; | ||
|
|
||
| let value = | ||
| <T::Native as NumCast>::from::<D::Native>(v).ok_or_else(|| { | ||
| ArrowError::CastError(format!( | ||
| "value of {:?} is out of range {}", | ||
| v, | ||
| T::DATA_TYPE | ||
| )) | ||
| })?; | ||
|
|
||
| value_builder.append_value(value); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } else { | ||
| match cast_options.safe { | ||
| true => { | ||
| for i in 0..array.len() { | ||
| if array.is_null(i) { | ||
| value_builder.append_null(); | ||
| } else { | ||
| let v = array | ||
| .value(i) | ||
| .div_checked(div) | ||
| .ok() | ||
| .and_then(<T::Native as NumCast>::from::<D::Native>); | ||
| value_builder.append_option(v); | ||
| } | ||
| for i in 0..array.len() { | ||
| if array.is_null(i) { | ||
| value_builder.append_null(); | ||
| } else { | ||
| match cast_options.safe { | ||
| true => { | ||
| let v = cast_single_decimal_to_integer::<D, T::Native>( | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The original code hoisted checks for The new code pushes the It would be safer to just preserve the replication (even tho it duplicates logic with the new helper), and rely on the compiler's inlining and "jump threading" optimizations to eliminate that redundancy: code snippetif scale < 0 {
if cast_options.safe {
for i in 0..array.len() {
if array.is_null(i) {
value_builder.append_null();
} else {
let v = cast_single_decimal_to_integer::<D, T::Native>(...);
value_builder.append_option(v.ok());
}
}
} else {
for i in 0..array.len() {
if array.is_null(i) {
value_builder.append_null();
} else {
let v = cast_single_decimal_to_integer::<D, T::Native>(...);
value_builder.append_value(v?);
}
}
}
} else {
if cast_options.safe {
for i in 0..array.len() {
if array.is_null(i) {
value_builder.append_null();
} else {
let v = cast_single_decimal_to_integer::<D, T::Native>(...);
value_builder.append_option(v.ok());
}
}
} else {
for i in 0..array.len() {
if array.is_null(i) {
value_builder.append_null();
} else {
let v = cast_single_decimal_to_integer::<D, T::Native>(...);
value_builder.append_value(v?);
}
}
}
}If you wanted to simplify a bit, you could define and use a local macro inside this function: // Helper macro for emitting nearly the same loop every time, so we can hoist branches out.
// The compiler will specialize the resulting code (inlining and jump threading)
macro_rules! cast_loop {
(|$v:ident| $body:expr) => {{
for i in 0..array.len() {
if array.is_null(i) {
value_builder.append_null();
} else {
let $v = cast_single_decimal_to_integer::<D, T::Native>(...);
$body
}
}
}};
}
if scale < 0 {
if cast_options.safe {
cast_loop!(|v| value_builder.append_option(v.ok()));
} else {
cast_loop!(|v| value_builder.append_value(v?));
}
} else {
if cast_options.safe {
cast_loop!(|v| value_builder.append_option(v.ok()));
} else {
cast_loop!(|v| value_builder.append_value(v?));
}
}Note that the four loop bodies are almost syntactically identical -- differing only in whether they
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the detailed explain. fixed. |
||
| array.value(i), | ||
| div, | ||
| scale as _, | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are we casting? Isn't it a trivial i16 -> i16 cast?
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To avoid the overlfow |
||
| T::DATA_TYPE, | ||
| ) | ||
| .ok(); | ||
| value_builder.append_option(v); | ||
| } | ||
| } | ||
| false => { | ||
| for i in 0..array.len() { | ||
| if array.is_null(i) { | ||
| value_builder.append_null(); | ||
| } else { | ||
| let v = array.value(i).div_checked(div)?; | ||
|
|
||
| let value = | ||
| <T::Native as NumCast>::from::<D::Native>(v).ok_or_else(|| { | ||
| ArrowError::CastError(format!( | ||
| "value of {:?} is out of range {}", | ||
| v, | ||
| T::DATA_TYPE | ||
| )) | ||
| })?; | ||
|
|
||
| value_builder.append_value(value); | ||
| } | ||
| false => { | ||
| let value = cast_single_decimal_to_integer::<D, T::Native>( | ||
| array.value(i), | ||
| div, | ||
| scale as _, | ||
| T::DATA_TYPE, | ||
| )?; | ||
|
|
||
| value_builder.append_value(value); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| Ok(Arc::new(value_builder.finish())) | ||
| } | ||
|
|
||
| /// Casting a given decimal to an integer based on given div and scale. | ||
| /// The value is scaled by multiplying or dividing with the div based on the scale sign. | ||
| /// Returns `Err` if the value is overflow or cannot be represented with the requested precision. | ||
| pub fn cast_single_decimal_to_integer<D, T>( | ||
| value: D::Native, | ||
| div: D::Native, | ||
| scale: i16, | ||
| type_name: DataType, | ||
| ) -> Result<T, ArrowError> | ||
| where | ||
| T: NumCast + ToPrimitive, | ||
| D: DecimalType + ArrowPrimitiveType, | ||
| <D as ArrowPrimitiveType>::Native: ToPrimitive, | ||
| { | ||
| let v = if scale < 0 { | ||
| value.mul_checked(div)? | ||
| } else { | ||
| value.div_checked(div)? | ||
| }; | ||
|
|
||
| T::from::<D::Native>(v).ok_or_else(|| { | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did not unify these two functions, because if I unify them with a common function like Then, in the caller function, I can't the value of
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah tricky indeed. |
||
| ArrowError::CastError(format!("value of {:?} is out of range {:?}", v, type_name)) | ||
| }) | ||
| } | ||
|
|
||
| /// Cast a decimal array to a floating point array. | ||
| /// | ||
| /// Conversion is lossy and follows standard floating point semantics. Values | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,9 +72,26 @@ use arrow_schema::*; | |
| use arrow_select::take::take; | ||
| use num_traits::{NumCast, ToPrimitive, cast::AsPrimitive}; | ||
|
|
||
| pub use decimal::{DecimalCast, rescale_decimal}; | ||
| pub use decimal::{ | ||
| DecimalCast, cast_single_decimal_to_integer, parse_string_to_decimal_native, rescale_decimal, | ||
| single_float_to_decimal, | ||
| }; | ||
| pub use string::cast_single_string_to_boolean_default; | ||
|
|
||
| /// Lossy conversion from decimal to float. | ||
| /// | ||
| /// Conversion is lossy and follows standard floating point semantics. Values | ||
| /// that exceed the representable range become `INFINITY` or `-INFINITY` without | ||
| /// returning an error. | ||
| #[inline] | ||
| pub fn single_decimal_to_float_lossy<D, F>(f: &F, x: D::Native, scale: i32) -> f64 | ||
| where | ||
| D: DecimalType, | ||
| F: Fn(D::Native) -> f64, | ||
| { | ||
| f(x) / 10_f64.powi(scale) | ||
|
klion26 marked this conversation as resolved.
|
||
| } | ||
|
|
||
| /// CastOptions provides a way to override the default cast behaviors | ||
| #[derive(Debug, Clone, PartialEq, Eq, Hash)] | ||
| pub struct CastOptions<'a> { | ||
|
|
@@ -2314,10 +2331,10 @@ where | |
| Int32 => cast_decimal_to_integer::<D, Int32Type>(array, base, *scale, cast_options), | ||
| Int64 => cast_decimal_to_integer::<D, Int64Type>(array, base, *scale, cast_options), | ||
| Float32 => cast_decimal_to_float::<D, Float32Type, _>(array, |x| { | ||
| (as_float(x) / 10_f64.powi(*scale as i32)) as f32 | ||
| single_decimal_to_float_lossy::<D, F>(&as_float, x, *scale as _) as f32 | ||
| }), | ||
| Float64 => cast_decimal_to_float::<D, Float64Type, _>(array, |x| { | ||
| as_float(x) / 10_f64.powi(*scale as i32) | ||
| single_decimal_to_float_lossy::<D, F>(&as_float, x, *scale as _) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we're anyway changing the code, (a bunch more similarly lossless
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, |
||
| }), | ||
| Utf8View => value_to_string_view(array, cast_options), | ||
| Utf8 => value_to_string::<i32>(array, cast_options), | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.