diff --git a/components/experimental/src/compactdecimal/provider.rs b/components/experimental/src/compactdecimal/provider.rs index 1da8a835d6a..70ff4a7b680 100644 --- a/components/experimental/src/compactdecimal/provider.rs +++ b/components/experimental/src/compactdecimal/provider.rs @@ -12,9 +12,10 @@ //! Read more about data providers: [`icu_provider`] use icu_pattern::SinglePlaceholderPattern; -use icu_plurals::provider::PluralElementsPackedULE; +use icu_plurals::provider::{FourBitMetadata, PluralElementsPackedULE}; use icu_provider::prelude::*; use zerovec::ule::vartuple::VarTupleULE; +use zerovec::ule::{to_sized_varule_bytes, SizedVarULEBytes}; use zerovec::VarZeroVec; #[cfg(feature = "compiled_data")] @@ -71,9 +72,13 @@ pub struct CompactDecimalPatternData<'data> { impl CompactDecimalPatternData<'_> { /// The pattern `0`, which is used for low magnitudes and omitted from the data struct. - // Safety: the integrity of the VarULE is enforced in validate_plural_pattern_0_map - pub const PLURAL_PATTERN_0: &'static PluralElementsPackedULE = - unsafe { PluralElementsPackedULE::from_bytes_unchecked(&[0, 1]) }; + pub const PLURAL_PATTERN_0: SizedVarULEBytes< + 2, + PluralElementsPackedULE, + > = PluralElementsPackedULE::new_mn( + FourBitMetadata::zero(), + to_sized_varule_bytes!(SinglePlaceholderPattern::PASS_THROUGH), + ); pub(crate) fn patterns_and_exponent_for_magnitude( &self, @@ -84,22 +89,8 @@ impl CompactDecimalPatternData<'_> { .filter(|t| i16::from(t.sized) <= magnitude) .last() .map(|t| (&t.variable, t.sized - t.variable.get_default().0.get())) - .unwrap_or((Self::PLURAL_PATTERN_0, 0)) + .unwrap_or((Self::PLURAL_PATTERN_0.as_varule(), 0)) } } -#[test] -fn validate_plural_pattern_0_map() { - use icu_plurals::{provider::FourBitMetadata, PluralElements}; - use zerovec::ule::encode_varule_to_box; - - assert_eq!( - CompactDecimalPatternData::PLURAL_PATTERN_0, - &*encode_varule_to_box(&PluralElements::new(( - FourBitMetadata::try_from_byte(0).unwrap(), - SinglePlaceholderPattern::PASS_THROUGH - ))) - ); -} - icu_provider::data_struct!(CompactDecimalPatternData<'_>, #[cfg(feature = "datagen")]); diff --git a/components/pattern/src/lib.rs b/components/pattern/src/lib.rs index a094d351ed3..db98234cdb6 100644 --- a/components/pattern/src/lib.rs +++ b/components/pattern/src/lib.rs @@ -134,6 +134,12 @@ impl SinglePlaceholderPattern { /// ``` pub const PASS_THROUGH: &'static SinglePlaceholderPattern = SinglePlaceholderPattern::from_ref_store_unchecked("\x01"); + + #[doc(hidden)] // for macro to_sized_varule_bytes + pub const fn as_bytes(&self) -> &[u8] { + // TODO: Add safety note + self.store.as_bytes() + } } /// # Examples diff --git a/components/plurals/src/provider.rs b/components/plurals/src/provider.rs index 149dd5d619a..b80aee4b3fb 100644 --- a/components/plurals/src/provider.rs +++ b/components/plurals/src/provider.rs @@ -27,11 +27,11 @@ use yoke::Yokeable; use zerofrom::ZeroFrom; use zerovec::ule::vartuple::VarTuple; use zerovec::ule::vartuple::VarTupleULE; -use zerovec::ule::AsULE; use zerovec::ule::EncodeAsVarULE; use zerovec::ule::UleError; use zerovec::ule::VarULE; use zerovec::ule::ULE; +use zerovec::ule::{AsULE, SizedVarULEBytes}; use zerovec::VarZeroSlice; pub mod rules; @@ -482,6 +482,94 @@ where core::mem::transmute(bytes) } + /// Creates a [`PluralElementsPackedULE`] with an "other" variant in a const context. + /// + /// Const parameters: + /// + /// - `M`: the length of `input` + /// - `N`: the length of the return value which is `M + 1` + /// + /// When [generic_const_exprs] is stabilized, we will be able to add a new + /// function signature without both const parameters. + /// + /// # Panics + /// + /// Panics if N != M + 1. + /// + /// # Examples + /// + /// ``` + /// use icu::plurals::provider::PluralElementsPackedULE; + /// use icu::plurals::provider::FourBitMetadata; + /// use icu::plurals::PluralRules; + /// use icu::locale::locale; + /// use zerovec::ule::SizedVarULEBytes; + /// + /// let value = "hello, world!"; // 13 bytes long + /// let metadata = FourBitMetadata::try_from_byte(11).unwrap(); + /// let inner_ule = SizedVarULEBytes::<13, str>::try_from_encodeable(value).unwrap(); + /// let plural_ule = PluralElementsPackedULE::new_mn::<_, 14>(metadata, inner_ule); + /// let rules = PluralRules::try_new(locale!("en").into(), Default::default()).unwrap(); + /// + /// assert_eq!(plural_ule.as_varule().get(0.into(), &rules), (metadata, "hello, world!")); + /// assert_eq!(plural_ule.as_varule().get(1.into(), &rules), (metadata, "hello, world!")); + /// assert_eq!(plural_ule.as_varule().get(2.into(), &rules), (metadata, "hello, world!")); + /// ``` + /// + /// In a const context: + /// + /// ``` + /// use icu::plurals::provider::PluralElementsPackedULE; + /// use icu::plurals::provider::FourBitMetadata; + /// use icu::plurals::PluralRules; + /// use icu::locale::locale; + /// use zerovec::ule::SizedVarULEBytes; + /// + /// const metadata: FourBitMetadata = FourBitMetadata::zero(); + /// let plural_ule = const { + /// PluralElementsPackedULE::new_mn::<_, 1>(metadata, SizedVarULEBytes::EMPTY_STR) + /// }; + /// + /// let rules = PluralRules::try_new(locale!("en").into(), Default::default()).unwrap(); + /// + /// assert_eq!(plural_ule.as_varule().get(0.into(), &rules), (metadata, "")); + /// assert_eq!(plural_ule.as_varule().get(1.into(), &rules), (metadata, "")); + /// assert_eq!(plural_ule.as_varule().get(2.into(), &rules), (metadata, "")); + /// ``` + /// + /// [generic_const_exprs]: https://doc.rust-lang.org/beta/unstable-book/language-features/generic-const-exprs.html#generic_const_exprs + pub const fn new_mn( + metadata: FourBitMetadata, + input: SizedVarULEBytes, + ) -> SizedVarULEBytes> { + #[allow(clippy::panic)] // for safety, and documented + if N != M + 1 { + panic!(concat!( + "new_mn: N (", + stringify!(N), + ") != 1 + M (", + stringify!(M), + ")" + )); + } + let mut bytes = [0u8; N]; + #[allow(clippy::unwrap_used)] // the bytes are nonempty because N > 0 + let (start, remainder) = bytes.split_first_mut().unwrap(); + // TODO(1.87): use copy_from_slice + let mut i = 0; + #[allow(clippy::indexing_slicing)] // both remainder and input are length M + while i < M { + remainder[i] = input.as_bytes()[i]; + i += 1; + } + // First byte = 0...mmmm for a singleton + *start = metadata.get(); + // Safety: bytes are a valid representation of this type: + // 1. The first bit is 0 which indicates a singleton + // 2. The remainder is a valid V by invariant of the input parameter + unsafe { SizedVarULEBytes::new_unchecked(bytes) } + } + /// Returns a tuple with: /// 1. The lead byte /// 2. Bytes corresponding to the default V diff --git a/utils/zerovec/src/ule/fixed_length.rs b/utils/zerovec/src/ule/fixed_length.rs new file mode 100644 index 00000000000..cf821d23123 --- /dev/null +++ b/utils/zerovec/src/ule/fixed_length.rs @@ -0,0 +1,181 @@ +// This file is part of ICU4X. For terms of use, please see the file +// called LICENSE at the top level of the ICU4X source tree +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). + +use crate::ule::{EncodeAsVarULE, UleError, VarULE, ULE}; +use core::fmt; +use core::marker::PhantomData; +use core::ops::Deref; + +/// A container for a [`VarULE`] with a fixed byte length. +/// +/// This container may be useful if the length of your VarULE is known at compile-time. +/// +/// To construct one of these in a const context, consider [`to_sized_varule_bytes!`]. +/// +/// # Examples +/// +/// ``` +/// use zerovec::ule::SizedVarULEBytes; +/// use zerovec::ule::to_sized_varule_bytes; +/// +/// let from_constructor = SizedVarULEBytes::<13, str>::from_varule("hello, world!").unwrap(); +/// let from_macro = to_sized_varule_bytes!("hello, world!"); +/// +/// assert_eq!(&*from_constructor, "hello, world!"); +/// assert_eq!(&*from_macro, "hello, world!"); +/// ``` +#[derive(Copy, Clone, PartialEq, Eq)] +pub struct SizedVarULEBytes { + /// Invariant: The bytes MUST be a valid VarULE representation of `V`. + bytes: [u8; N], + _marker: PhantomData, +} + +impl SizedVarULEBytes { + /// Creates one of these from an [`EncodeAsVarULE`]. + /// + /// Returns an error if the byte length in the container is not the correct length + /// for the encodeable object. + /// + /// # Examples + /// + /// ``` + /// use zerovec::ule::SizedVarULEBytes; + /// + /// let container = SizedVarULEBytes::<13, str>::try_from_encodeable("hello, world!").unwrap(); + /// + /// assert_eq!(&*container, "hello, world!"); + /// + /// // Returns an error if the container is not the correct size: + /// SizedVarULEBytes::<20, str>::try_from_encodeable("hello, world!").unwrap_err(); + /// ``` + pub fn try_from_encodeable(input: impl EncodeAsVarULE) -> Result { + let len = input.encode_var_ule_len(); + if len != N { + return Err(UleError::length::(len)); + } + let mut bytes = [0u8; N]; + input.encode_var_ule_write(&mut bytes); + // Safety: the bytes were just written from an EncodeAsVarULE impl + unsafe { Ok(Self::new_unchecked(bytes)) } + } + + /// Creates one of these from a [`VarULE`]. + /// + /// Returns an error if the byte length in the container is not the correct length + /// for the encodeable object. + pub fn from_varule(input: &V) -> Result { + let src = input.as_bytes(); + let len = src.len(); + if len != N { + return Err(UleError::length::(len)); + } + let mut bytes = [0u8; N]; + bytes.copy_from_slice(src); + // Safety: the bytes were just copied from V + unsafe { Ok(Self::new_unchecked(bytes)) } + } + + /// Creates one of these directly from bytes. + /// + /// # Safety + /// + /// The bytes MUST be a valid VarULE representation of `V`. + pub const unsafe fn new_unchecked(bytes: [u8; N]) -> Self { + Self { + bytes, + _marker: PhantomData, + } + } + + #[doc(hidden)] // macro constructor + pub const unsafe fn new_unchecked_with_type_hint(bytes: [u8; N], _hint: &V) -> Self { + Self::new_unchecked(bytes) + } + + /// Returns the bytes backing this [`SizedVarULEBytes`], which are + /// guaranteed to be a valid VarULE representation of `V`. + pub const fn as_bytes(&self) -> &[u8; N] { + &self.bytes + } + + /// Returns the container as an instance of `V`. + pub fn as_varule(&self) -> &V { + debug_assert!(V::validate_bytes(&self.bytes).is_ok()); + // Safety: self.bytes are a valid VarULE representation of `V`. + unsafe { V::from_bytes_unchecked(&self.bytes) } + } +} + +impl fmt::Debug for SizedVarULEBytes +where + V: fmt::Debug, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.as_varule().fmt(f) + } +} + +impl AsRef for SizedVarULEBytes { + fn as_ref(&self) -> &V { + self.as_varule() + } +} + +impl Deref for SizedVarULEBytes { + type Target = V; + fn deref(&self) -> &Self::Target { + self.as_varule() + } +} + +impl SizedVarULEBytes<0, str> { + /// The empty string as a [`SizedVarULEBytes`]. + // Safety: the empty slice is a valid str + pub const EMPTY_STR: Self = unsafe { Self::new_unchecked([]) }; +} + +impl SizedVarULEBytes<0, [T]> { + /// The empty slice as a [`SizedVarULEBytes`]. + // Safety: the empty slice is a valid str + pub const EMPTY_SLICE: Self = unsafe { Self::new_unchecked([]) }; +} + +/// Takes a const expression resolving to a [`VarULE`] and returns one +/// resolving to an appropriately sized [`SizedVarULEBytes`]. +/// +/// The expression is inserted twice into code, once for evaluation and once +/// for the type hint only. If this is a problem, save the expression into a +/// const variable first. +/// +/// # Examples +/// +/// ``` +/// use zerovec::ule::SizedVarULEBytes; +/// use zerovec::ule::to_sized_varule_bytes; +/// +/// let stack_str = const { to_sized_varule_bytes!("hello, world!") }; +/// assert_eq!(&*stack_str, "hello, world!"); +/// ``` +#[macro_export] +#[doc(hidden)] // macro +macro_rules! __to_sized_varule_bytes { + ($expr:expr) => {{ + const SRC: &[u8] = { $expr }.as_bytes(); + const N: usize = SRC.len(); + let mut bytes: [u8; N] = [0; N]; + // TODO(1.87): use copy_from_slice + let mut i = 0; + #[allow(clippy::indexing_slicing)] // both bytes and SRC are length N + while i < N { + bytes[i] = SRC[i]; + i += 1; + } + // Safety: `bytes` is a valid representation of input by the VarULE + // trait bound on SizedVarULEBytes below + unsafe { SizedVarULEBytes::new_unchecked_with_type_hint(bytes, { $expr }) } + }}; +} +#[doc(inline)] +pub use __to_sized_varule_bytes as to_sized_varule_bytes; diff --git a/utils/zerovec/src/ule/mod.rs b/utils/zerovec/src/ule/mod.rs index b5f212029d2..578d16ff769 100644 --- a/utils/zerovec/src/ule/mod.rs +++ b/utils/zerovec/src/ule/mod.rs @@ -14,6 +14,7 @@ mod chars; #[cfg(doc)] pub mod custom; mod encode; +mod fixed_length; mod macros; mod multi; mod niche; @@ -30,6 +31,7 @@ pub use chars::CharULE; #[cfg(feature = "alloc")] pub use encode::encode_varule_to_box; pub use encode::EncodeAsVarULE; +pub use fixed_length::{to_sized_varule_bytes, SizedVarULEBytes}; pub use multi::MultiFieldsULE; pub use niche::{NicheBytes, NichedOption, NichedOptionULE}; pub use option::{OptionULE, OptionVarULE}; @@ -441,6 +443,8 @@ impl UleError { } /// Construct an "invalid length" error for the given type and length + /// + /// The length is of the input bytes, not the expected length. pub fn length(len: usize) -> UleError { UleError::InvalidLength { ty: any::type_name::(),