From d7b53ec340e15445b2c1f5f53e8a63f772daf98a Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Wed, 6 May 2026 14:24:11 +0800 Subject: [PATCH 01/34] improve colr emoji detection and testing --- fontique/src/collection/mod.rs | 25 +++++------ parley/src/analysis/cluster.rs | 7 +++ parley/src/shape/mod.rs | 4 +- ...lector_16_without_default_font-2x_hint.png | Bin 0 -> 7667 bytes ...r_16_without_default_font-2x_hint_skew.png | Bin 0 -> 8272 bytes ...ctor_16_without_default_font-2x_nohint.png | Bin 0 -> 7667 bytes ...16_without_default_font-2x_nohint_skew.png | Bin 0 -> 8272 bytes ..._selector_16_without_default_font-hint.png | Bin 0 -> 2951 bytes ...ctor_16_without_default_font-hint_skew.png | Bin 0 -> 3035 bytes ...elector_16_without_default_font-nohint.png | Bin 0 -> 2951 bytes ...or_16_without_default_font-nohint_skew.png | Bin 0 -> 3035 bytes parley_tests/tests/draw.rs | 41 ++++++++++++++++-- 12 files changed, 58 insertions(+), 19 deletions(-) create mode 100644 parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint.png create mode 100644 parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint_skew.png create mode 100644 parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_nohint.png create mode 100644 parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_nohint_skew.png create mode 100644 parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint.png create mode 100644 parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint_skew.png create mode 100644 parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint.png create mode 100644 parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint_skew.png diff --git a/fontique/src/collection/mod.rs b/fontique/src/collection/mod.rs index 08ad5af76..ddbd53f66 100644 --- a/fontique/src/collection/mod.rs +++ b/fontique/src/collection/mod.rs @@ -318,22 +318,19 @@ impl Inner { /// Returns the family object for the given family identifier. pub fn family(&mut self, id: FamilyId) -> Option { self.sync_shared(); + if let Some(family) = self.data.families.get(&id) { - family.as_ref().cloned() - } else { - #[cfg(feature = "system")] - if let Some(system) = &self.system { - let family = system.fonts.lock().unwrap().family(id); - self.data.families.insert(id, family.clone()); - family - } else { - None - } - #[cfg(not(feature = "system"))] - { - None - } + return family.as_ref().cloned(); } + + #[cfg(feature = "system")] + if let Some(system) = &self.system { + let family = system.fonts.lock().unwrap().family(id); + self.data.families.insert(id, family.clone()); + return family; + } + + None } /// Returns the family object for the given name. diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index c2d6228e3..22dccb52a 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -52,6 +52,8 @@ pub(crate) struct Char { /// Indexes into the list of styles for the containing text run, to find the style applicable /// to this character. pub style_index: u16, + /// Whether the emoji with non-printing variation selector + pub is_emoji_with_non_printing_variation_selector: bool, } pub(crate) type GlyphId = u16; @@ -351,6 +353,11 @@ impl<'a> Mapper<'a> { } let mut mapped = 0; for (c, g) in self.chars.iter().zip(glyphs.iter_mut()) { + // If the color emoji has a non-printing variation selector, ignore the variation selector. + if c.is_emoji_with_non_printing_variation_selector { + break; + } + if !c.contributes_to_shaping { *g = f(c.ch); if self.map_len == 1 { diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index cd451001e..abb6ef703 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -262,8 +262,7 @@ fn fill_cluster_in_place( let is_emoji_with_non_printing_variation_selector = is_emoji_or_pictograph && info.is_variation_selector(); - let contributes_to_shaping = - info.contributes_to_shaping() && !is_emoji_with_non_printing_variation_selector; + let contributes_to_shaping = info.contributes_to_shaping(); if contributes_to_shaping { map_len += 1; } @@ -274,6 +273,7 @@ fn fill_cluster_in_place( glyph_id: 0, style_index: *style_index, is_control_character: info.is_control(), + is_emoji_with_non_printing_variation_selector, }); } diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint.png b/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint.png new file mode 100644 index 0000000000000000000000000000000000000000..89307d853f10fe5b54bf3cde12adedc023a7ba03 GIT binary patch literal 7667 zcmbVxWl$VUyCv>6!Glb2hlJn~APnvSf&{n0bqEmLg1bwA3>qK=eQ~!J3C`fd;O?$- z`F?EGy}PxwRr{m6pE}*u=TvvsseZbnG}RRd@m}Mhpr8;cDS|$|w6`xM2nXw>W-TZx zMM0r!Q3A>6crPBpM(jzoP$JG;Y}^mbvWvtNi~8dt$+S+%jrNs|?kPS5HR&4N;e1@V z$6GH!$R{$gD>}^z{Z#80Xt(0`z*ymg01m@Xuy-(@b5C5M#akqa5Y~S6H`Gnc=owas zWCEk)U=@DXp)QvH2ZF`>zmNI9AV1!cv)wYS1@*VH)x%6`8&--sZzE-Phz9wpJDzga zHU)zv)nhbgIL-}*Yll^xf}MBhe_wv5Jd%n@PnUK5-n9ZJf4$+kz996}ket@&iADUG z-QFo)10jq2z;NGfC-{2n@MH(XY?KqI3Tyj&iNUS{J);;pmArCKu{lZ%U9|o_dq9?E z5UKslx+yz6$!d3&_4P7I`Um6aJ9}>Ti6&*T{xFg2m;egMxtGAT-Y>|GlGx53e00~S z-1ThaeLQ14EBEG~riO<^$H(t>dry9Dn@uSq3{hO28ni6AWOjxujEtJ6b{ia;SS8GO zr0ae;^^fZ5&J#{oL)qf>4w5#k6*JL40wwS;XWc>XE8poo4TQsZ4LqcUF48Qpj;GtKIOd8K&unN_4G=!6z;JOP6Gdi-fu4Di*Pq7SXol!Xe5IO zE2E;ELd?um%`}S~fi6%PpHxtne)og)R)Wt?I66f(ml1|%yrWA@q7zAuX{2NHJ3+iT zCcbN>Jk9J1hg<;K&+{fY>O#I0iJY%MI;NX!&rlG5Jnof2a(@L>-l@R3D@ z1g*4Tn`iIZX^b@Bf#k^1B3-Y{I4~VHt>JciFiz0eu_=?L+uiwhHlw-Y_7#3yg@A+8 zt0n2TxqvPzbeshAHC{b(BeL0K&I8RWubM@sWCA8k`PQ&Qifo`}Gwp12O#wN)*tSEX zvUR3Dlu%SYf=cb|IMdM;WI!=(da1%>f_ftAWHXs~d?k{Ae9|tO?5d|kNn~eqZd5#3 z-h@)u7-h9zcXD|b$3Hz&c(u^jo*LQjdMI3_(;A3?=h27eu_{r@BM>rz{^% zLc|N4TbPp>)RQ>P2vR?=ajCPn=U@dGC_Zswu$1vr%wam1H~EGl$?$rHOf$2DdW=tgz}$nZ@ah zJ&Vl4W++hyEk8xbAh~4uITc7IhyMV znSw>%2+Iz-L{k?5HbP2D;)P1|q}@z0dKbs#Z}~gpY2Py$llaw#2$E7ovYJm`-Ba*0 zArl9~{z_&>+WilKl_q7qlFBE|>w3*TOV}g;b=%UE$3xG2^XYkYc^L?ZUr1%yB*G0) zQt#~m;MwKwZ)LE)dGil#wL9T)G>g#!CQBsj4ZUT!M8f2^*stL4B4Z!B_}G>3W37wF zQlYIU1}5?1?tJ#TH$p53!LxZe@EZhxi*CPNGhw6fy5;Z5_SKiwks%OgRiH2)R6q$F z-6R=z!J(_mS4V>SqYr`SctQdv@QG@ix4_l&lnJ&Cj~o=0Gbq`$jkWQ`qbp`WP^MZv zZqhw=$*2!=uRlcSOXl6g8!pz7=HeBeqr7nS(cz)WngNPdM9}4OB9pH*gLpgzWZp3suQ!$oX!`6(>H1$dUJcop>2`@v zN5gwi+ROzzcLIZM6k%4y@Q}iGb9`_J#a}mYu}5PvZa}~s_<+|R@5<1R3`EAMtmTHU zfW1?mszF~K2&otNyDrri9>$^ueeD`2W|8-o#l-xD((5XJ!$ zrZ++evk;fzdhl%=NyY% zq=~Afb~G^?q0NJSil3Xq^t?AkbBqIfuY0O zxjyt@9@uB^=|KXbV7m44khVFtl&~^wXe!efS@>QO9~}y#PL1M6B{Clnu6M*AWfHX+ zDusRnoZE24fhu%|GdL4t<+9T1#jW}0bL?a3Y#xwV@o`(Br1(%)a?XdsnMiRz$lMN0 z;^%hzMgs7TC!Xl$Js#XwRrk~O$ow{Y zz#E@=F+$dR=K`{aw&h{x?O$K4ywzn8%|N-9Iy8K@s7jqCw|yTl}~uatO^I zU*5@3n#QEX_lD3G`NBhz>sgs5N&~wk4%}*rkad3Q8k#Tq};E!T|qCPQ&5h zcWh6U+6lrq1KBIK%rzc@>>)%(6aG*UF@FF1x8G^DsVqn&t%soLse61b9~>eqTGC2f zcS$vgP+&fGtF@A9`G(Qy#};slllc^RT~;6J9RIVHf>Zg-#9TSFkzTXNO{t;=Qb}Q3 zqynm$L8UzN=A)tvEdsfrzc^FC1$Jj>c4qk;n9Z?G;OF~7pXWU?)9p&34)waWok5h_ z!i4tG3m28e_^9mnu{}ABft%LEMQu;X2|uozkdp_2?axM*{zhKMOiz@e+HJqe?iJ2T zq>F!x^_5Y$EwVZ^C{2+SGbK_*{*uy--Obm*00m)1CDw;Ks}(NQ?QRRaYLpfbiSZ&w zNK12sM}`IE^F1zS)U38A^)GpyKT@uK=ZgR6!e{RoGRxcX&U)2o!1b3^LtI9`)dIiG zI1*zk@pH^6R2~J7<$>79`a4)0?h$!;D#(7t#-=}q-ZxGc4ZNnky{Ll z*r`jTkdqup-^m9)wcrgX)LiW5$QO1XpF7yq+u#xZ(35sB_n7m|AK4D>?91BLdc$F2 zLDD7eO786;ky-V8S<#Ryfunx%?rJsP#@U$?Yw17edd-)i#VH1_;L zj&2HsD$cDa-)fBSVWZZ8Fu-X(c0ac6Y1{4}6VkI?bW86Yku0^0(ovw388NWScf4X! z(!$QAIa+I>{S>FvefE3fDc-D$W%JQgLS1@c=+o83u~yr^lQBK_RF+*7=!dS{Cw;Yn7S@^`aGYI9m6l?sH}9M})D%8`z!W)^)-!TbkF>K|k#=y`jTOLvwfPnJ*9e?W1~q9q~b`RvR*g z8%{d&2CPZ{28IrKmBL533#6Nn%+@APSNzg<9hJjF5~rSBAgEj#>Fpok#>< z&p0^!228TkF)$VgLT93Xjc&Bd<`(msYI&VlL^@@YoO=8iPbgT=OevdNSo)(f?;n`K z%KTKVzRM>LL9?X!l>8MNC=8fHpkM%`F%nQhZi3!>f04EhM%$tr_^kg0;ZHR= z?z2v~8yTu`mAMQnM{k+hR*|^z>-89diJt%5ZUzsE_}ONQXd6MCdbUR_7gP3&C^g`x z!4w+HX&$S{*VPws9F(hx>$L2@6U#8)_Pl$lnx>z2Y2FPrYCRKznH)%sHs(Jw7M5Tz za@SxLcE=#NcSRWO*!lVN@#UNks^F%F42F?8n4Z|!r$IN7pCaRv%7V+t{*Z#~+FsS` z^^I-l$TUCF{v-JPhK2+n{jqCY4o;rh90DEpxY03kTVm!C^vqz7_P!r_cl}o|YnjSz zx8qtl|tt8iQMO^m|CUSL;gaEdy>c@C_p?7=yZ?swHaKFvod$^}YfXLV*)WMfd#ZkuK{_?T;Z z`kj+=PnG@q2)U0ic>a&RNEv{~Q}p1#{;R-IP34EAc`8bf^uN*=e;6a*Lu)l`^1}V% zYdfV=8={hJUhHa!`nA(qwy;m$t<2CdTbPO$y?OCoR&ukXn%H#d@gnc|*%Plw#Or)e zau{ieu!HlnSq#wM1EtH%xd`=-WB{T{4DrWGb&j)2c-C+T!{H8=f-LT6bN|O%%bTun zI;@Et(4Y-vmQ%ujR_g|TO0U5U7a4y!a?mqXC{VZ602WZIiAS|Nk)X?IOYu^y~C6-#&)Py%hq0aM$e>IDylTfCu_ zM8m!oOW($i?j`{%ccBoqp*wWzMyBfM=C4&pG_cY_M8Q7T$_z@%GxpUN!`AdZwjo=^ z7$`eo{vL$2qi&GS;#SRsTlrZ zdoLkTvBtg*7o>{3oxZ0}z*@H;ar<)9mSmgrNUOm}UO(II1sjqJoD=!6pSlcVhfFan znnrO7G)1(pzh_A%1k0qD(Yqp)WD5zvJw4QghM36k++_;Hy)ZG{V{k)9jAmac!xQ}@fZlTP~ zlI&ZPr8e}nncHIDTJY>eiy$tc{jgl{Z+LAa6_H@cD>2f%8YLXFTs-0juX_?*R$H3#CSFD?BNU81i}g;=q{h$PI)jzGbd-pM=0ST0pz>VCWjLdc$ z8}IN|<;5?i@!7^uUZ;K+k>$|EHid*}@cT)+i|5vPpC~S3fw{$^qNXrdf_}f;bTBgm zR<3;IQfj<&E8i=nS}`DYIJ1_+r5?c(j&32t{8k5Fm)FsbcO|?Bhc3Upiy*Q({06=# z_SkW`za!FkHq(#rb1%2DVSRZkELee57BSgtc_a(Qx~iHkg+6rrO}O?;@ra6)XCa($ zq9qj(?rNoF{%C}o{01t+I1!>q=N|d0?Y(EU=reUGIb8P*0BS)-~Qs;Y9biLG7A6+3RsZs(jt6rgr`&z12lT zt*{lX-#^T87KU|Bm$y`02C_Uk@a{56pWTSI6Xnp)fIkBrTR@M&4O4^>lX5U6sF9?E zgu-q16`pYf84E%p5{s_Ws=IkgYX@I~j zn}P8^i?wizT2`JRH2pb6gmtlrdhTRaW-t}So9{|4g$BMPvA4>SReiNjrYM?KEQ>|Q zWw-<%E+ee4&NKBc3s~so5@gTV@8upK&-KLs$gIDE_+nS(vH>L&k~v<{X_HDn32Z_! z+R*-V)0}=(^J%-`Gv=?+#T_$KlMx)H=GM);W{z8MP%=EwQjT~)>`#nItqZypViT>uT-X#ZerVcoJvcCHZFI}6|sJ* zrXz*e$D4U8funbCoMR^q19A7})!P=8bOnZ5RWjQmgwd--=h2po^1Aza!8eg_eEED~3x(%S*i*1Lh!wu&coj6T+ zz-xEd%DlcvDKkQ*MYYbe8B>zm`IGgtTp&iVz|d%a{U2|>yXRK@Y(Be1)CJch_K^vt z=x7VgcTfRFG?Cn2r?460)+#^6w2V}ra5o;#_9UzOE6CTjq=EY*Zw!Vu-K(~rywBu^ z>k@EZ{Vyx+8emZ1J)D-=W@Gi98@wd@(9yd*HJz(!a&@~en_pThxpu6-{`yPX7?IkU^J4?4h;cq0`mnPc5={7i=j#x-X)+Z6 zk`zD;XfVUv8Xei6v?ZziHMMGfwOQW#oUH$}hhGm=XlY@QpanbwE@?VHUL7qZ+R)58 zo*S0qjmsi^yNVlEhBrMsyByR=)CV8U;#cqBeD0H*vyM4!-|wDj$H$Q5q4rUA-4}Cg zuNcIjq8`*r;P(ptIa83g{dM#9Kp4R!9NLa8p^RZE2^Zl+$5JD=Zh~M@w}x14c*Y(z zWjQ)x6$}0+8_pyB@@{JJ zR){;=B-&!CX0HV?a4LBdD2r~Ab1QDDFc6pm^uNQe zAJ|@Scgo2+;!t)|?#%-u6S%G=jg!6B?oID?h1#A>T}zy@S_n>_&Up3RI#i&szhL)? zEr4V`;xpD5u0$mo@>N&)l>rq+UpUt#P?8 zN#&V36xI+M<|T2t&ZhrbqAwh=ewA-AGP6qf3m+Bu1NLa_nE8M<^w_((g0^8{#2>sH z>HRI;{$NS8?bwox|L+3AXzaMRbMWH{z4H6hO)91ZJ9W7-dd|G?}06?UwqM-NqzI=QlKsb*h z$A_{?0Du{+svvI=xUfGf>8@`}KYo1Zr==CFY97K8yWy#$K&k8F=R?WYDX~-1Am?*b z58Y*34yCapAl9SfZ2SJKZ?wG~m5E|Q(?{15Ghlko9OA-HTBNM+HMF+6{60#8S*qP> zIvv(B2`w@s)dhW2ad^KBumf}d9~PQUw*OioO!xbBkK`e6yFAAJl+ ztsE<2^&8Q%Nj|(tgeYE6R#}r`cjIx+XPC@LT zcmI@N2SR!aDCwjctT&e@eF8kt<4ep<&l6C}dgs7xnjB(h9Zf3*;Jg%afqFcw$){4nATPZm~6+%78f z{p)Mcgn5$q&&!NqS?HJ)6yd^+sIb+=U2*o^1oj+HvXX+HzGU8A+1KM<=!WhRvtK zK67jLPr7%kqW_|0?cmomajwrf$pGgwo|z?JC`-3YB8~TkL#m+Ar4&nGd}?gI*X9Y8 zsyBf-VO7G6Ns(?a%Z@LoP;MaTCsW9eXFagg2_))gB_hztEI5 zrj=iDOv`8HVcINemaLd~BRIV7@%rH9f%IgHu*?wvNIaUcvo4~FAUTr5R6W8L=)aM= zczusX&?(*VDipT|fMwT&?!H3;MR=_MTx3xqvOhTvj(Ej2(6POYRIWu<*jJ=y2L*sQN#o|G;AyoDV;_%Wmn<|h&d&l zfH1yNX#=tkx|TtFYGP1aoz%rrh)E!PnAAL+hDF+f-tv_ON#6YdxU;O4=#OE*5gXRr z=aT2#Ge)_JO~f?qefHkJbH3b|X!r5UAWAf^s$evRE0H+`-xC20K;o&_Sj{Qah%u$) zh+*+$d^JwnY1KDbi~u#aBg{+_`yUv3S0I*?h*c=1-F~VE*xK{Ns;E3msO-Iv#a9l? z^xUOEW8M}ImuaeU7UB-_^WyMBqapM%F8uv3y9&Qo)QaXxAdFZpjmBKg=XNvH9!dCY zpOQ%L^z-h65N#&48Sq#;mVt7XF^~`Ou3p(81td+8`N?{eO8^ z=i^;@{CDb%)8!Ut457ee(LKV91B_dPik~aZCt&IZ4`=C0^C_W4X_@R2B8B$ef6$t4 z+`nTgths-OkTkagYxAZa!YNwUJAr`aH%yee6=^~^Xf`8vsa+j7UVWC-n%L8(Jywvr zk6A+(hs?={wXZt^><3RnWUy;?jtWGl7}#3A!)NUICFuNR0Z47 zykd-|us8eGEy-z~C)BMyF_lb_v#(z59~P0!gochNRB5+`i=C7COl;n z7~%k}uM=|8ZGhQS62IYEdTH1#hWrX80b{oBU%U&t>@fZI4vU2^OhN1mz~&LCdxZ7( zJPT}ulc^btona@@br6l5pq)8qj5xvR?K*Z%yAq$o`x$}EBYIIwjzrU9-P^{C1PKJx^uxm?<6xLYi&3I>kBCGS?QY`S}HRmwn@M3^ig+{s@xZ?Qn zk$q!u_BmJG@LN?3V604+dhw0|tA+ri(VtA*V`6{QOZ~QR%-=fhoJ1hn5y1nbs??`v z+|!+LR=?Z!;8@DSpxgT1-aop%z=#rBhQAyBv1*e-^lYK-fP#++f(T2xzf#_L zf5X`K&KRUEt#mnIY`eageP*A%bVAt|6uN3#Lb~i~yGdQlHu05ZD3@#*+_)?RbtfbD z!FkN)qb8?_7q;)e0^xmr5P0&by7z$8$rKk)SZd|{o-*|6iiBDD(1OO?vL$&+>Y7V1 z0D!Xim%wZD;y)*Io9cZd(pD~1WkRRIC!L05&)15M#Ln-Zsa@W(O~9jZy{~WT#>FRd zt*?uqJhdJg)7!!ly%EIPn?!KAbSX{U_iT+*unYIjds~eMEF+S1c^boyYrp6M-#$Y@ zGoeb#c%p5T>Re|NPupg&)$Xygn?X`OR8mZ=NqNu_Rv0L@g@fesOhfjjfh-pD9@!^+ z?zQpB+RJCXOF8H&V|dP^CM8+PILyf!8kMq0`W{kB=?>yoC2Sm2Tr|_;TnSn1cW9Lw z#kYPEXmi+b^hr`CPR^%%1%49Gc_o56*+2dDZnyWPp%=rGneh>Z86-9}JtU#1m{Uvd zREv3Yob1?EMZ0uUG?3k(I?md=WWt{wGs|U$bF3nN;I+Xs;Il2!w;Py+U>1jn0hV&P zKn&3Xmal;F+tAFv!Ls>T#OdFBjV`eVEf1?-9(3BnmDX`sYM)F*Z5xg5vp!&hHM24@ zlkxCGH36cZT+AiGh0DKvxjM8yy%qBc5i@3%Jm~k`>oJ*P{`>EneUVcN5zhj`zw8bK zsjY%x8{jnv!>UEqs?CP^sG1f^C~yNoh6+z6Rkk8^Ogw$Yn4<)11mkKnKDcFl{(v)s z%bgBmeLFmo+*zhbbs)9&i{$k-3IA&6`_ZXG^r`boqAvzaO+m+mV#EB9?|3Geb%el^ z_dE_aDF)IXXBkMSTW#GON{*>P3;eGtEi+t4_Vp(7X$d*AywPQOJr|Q6|kRJ?6O{&+x1am)a>{43R>Rtq6JFEfM-i(!TAyswQ6?9%; z5gkoJ*MXRIWBSCq-JK4Mk;Aqp5!NJ2yQ?o`?` zkGMT$X(-sK2NHO_lxt#dpWM##vzb4sG_n|E6s<}|UPi9MKJBZpaA;Rb`r|Mnl}u~% zE1+&{l!b%1NtKCC`7wkPAnv>%EjtC~+ozi+b1oPf`M9^iHtR%d8%qV;D zWkXIHni$a*qD}2csoOY8PeD#+Nw1PxHef6xk3EjMsfj^vK!os!xY+L7QjqRDupg^w((wsF!bbwnLD|1Y~40xcj?o*cd<=bw?&f_yM#Dh zyXP*CCc|Sp4&btNeD15YvAU>}+x-slmfInn!s&`AK~ybi#gpsz*)9i!yBl~j=Y+cM zh4QtU4GhyVvf9v))Yg^=A%ThivVRv>grRa=B(v>k-2u=P3Y!Z zS(}tvlUA<&h+>U?^t5=tN{CbH$}Q&lOR>d1|FYe-r)GYZILjEA@y0x4Kb4NS4GoGD zvdLMVz_`jTO1D^IZjbp>0Q8sy$2_74jzrj-)JMFY-2SPx`mq$o|3Q>P{PSWt0$P2AWO{9yFHpy6z`kym z`(!xJCB*lf4EZ|s*iGum(LkZ~x?r=pe7Qj#I)kF9l8&!=%#ddx3YGyQO;=!F@QWL5 zdEakk*?j^mDI9ZE7K}KLDqfs{)^ZxSQz{iu(w1Jnm8dZp%8Yp>}i3PY>ZQQl+SD3FnWkAUUo6|FN{O z^~>#PZ9j3`j!)6Y6U4k*LvQs6>!PS-H#I3<=($$h&(IXj&V63Fw~NUK7k!VSf?K%kwZc#xAo~q@)de zyAQ;e`etv&_hHx+(Gu}UW9!LTJ8U=e27cWhnNQ})e|3v2Y7Tm~@j2PQFxc)*xzW4F zYIkyOUF)~Em^7dFr#6G!3$D*{1IUy97_?6yJ;bhVpJlf`FH37Qog@C~QW%m8AE?cM z9iJE6V}MT#GhQFu@HdPar-ou~>iy3su5}009VcdW)k^It-7w%?a>aUOGDa0$T_Y6? z*w6gaI5kWPEld0Z;*M@vyl1q_lUokc^KQQJ{>hHJlV1W~Gb7|+hqEUqGIC5E<3QTbNVs!Ae z;T*g+?O)(GC5|WFcU#ZeDmI$<&J%)j<|{u*)s=WEwkKPAVeSo7+{`UN8@KVQ&o-cY zLAyh*#hWg_YHvA$OihjIiq$A6=?a>cgB|oUt^*VQsbuKYtLXfz7L%scY|iQRmvOib z%>ALqBK4xXGy0R^xXlTuo{!t>`A3==vRi(4FkMUy;JxadB)=X?kIQL_#-^}Zb z|F505sc1+gsM`fF@w^-_xNfBa=4i#wZ7F&<(;&(X1T{Qq05&O`v(SM!>1L9Iaz%td z-4Q1A5=nk@_?$d8^{N#8eLc3A(v!PlQ5hZ&f%j(_| zMQt^mgN@jrmiI@zT();8`Mgs>KAS;a?v??YuADidUMxETs?Vi%`2-9kf?uRQG>$j* zTk-W%|C=GO)MD~(;qM!;36ha)HeUu7JOyw8d!w|c-PbRQMq90)5`&v09!ckO^4YL8 zETlxfSc=cDWp#R4qtYM5CiMcD0RFp|!C&@1wf`DKNXEXCj{n=L0tSCaYuT2%X!>4M z6!8I5(cw4fc%#<(OH|WCtbd$wM%b?v$pBEq>%)ifz!_)hRq-qAg=ut)7{NH0-;XmklEm_zF{h3b+nd%kTXh3KP8X7(MIpFw?;=<=-q`*YGHoS0LD=b4K&Z0H(LyWN-a}<}( zCX|~@5YQjPiOLvNI=_)G*K#b&{CmV&AoYhe>ZK~_Wz+zL3QHm^PqM-Fo1#J=DMS34 z-av$(jR%zvwaupKwQT|00s1kJ#AC7bOb|e%D;Cx=#(-;Xs=^fJLZK}qF!r@e5SQe6 z)1h2$?kCB$CRqlbOFDt6Fy((kErwR1@R;E#;h@WcECkfdar|TYo0j>H@M9BqzvPYc zj@-a2Zfo1CQ9iUDBeeKFKDV9w>%X_S0Zl z)H9JIZq1}Kj@c%%eDgmV9u0zREnnN0*#lM#9L86kE(TCli%7))oC%=>M$tlRdS#z6 zwIo2gyf|!1l|cgyiguHHuo_>~pxWngNXsnZyT>xB(gIb-`BqqLc?lVC)wA0CxzGE( zGS~PB@t46Ox-Y4#&PWvWcAwg}j(M{&8En#*MIax*bQPOhIowO=R!D`RU*5K6jyuA6I~;|QOTWMF;@^FWXE-(Wd^Isl zs~TP#!RnI3StvMalV?)xud7Pt`9w0TU_OX-yw@R4N`sIU)|K`HFBFholBy-}e5Mb<7>5ceiG17~?n!*q8rF8?k2B_ffeiQ~D_KiM*;0TCzEkn#0>u=Y5U- zieQ{uYKvJL7e)eA8cF>lwl8;OE5RLseG=1{X@$4;D1qA&O|_U)P>tl#mT{FnE@VmX|(x6D<{QO^Xc9MrEIn@Io-2?iK2l}4OO3J>xU+^|fm1UZV{O%U$``4V6~=eC;(OolDs)Y_^%YM2{3ZiyjI$>7=qVrg{ha8wX?p+A z^qY8`@N~P89gft?jkLhNPxljt8TkqRgncf+jM(W~FAnM+qSAJOoshCS(pp>_`$oJ@ zN(43=ED&UB(uVmcXw6Jf!PXF?f2gwv`E^xj6=T9)p}KkJ+f2{!ubgpu?cB}4zm=gj0!gFX$*j600U+BDoXXbOgHV-_j0gX za35B`uzYd%6ZAjRQ}@ETD_aHrRHBeNDeY+xGMZ#w#p3k5cQnbdl}Y?SKjMl{aDTC?dyPi+Gt>+uD%UKkP-Y_57zC_l87r0}_75lQ^CP1M`d>hr)Cr(ylQa-!uczd3 zQWAF?wBGd~PcCq)Xkn7n3i{x4o=|=k`mk^9(4f?vB)VT;I(m)O_7@i~-&?+?K&6yi zw5m z{#@l-v;M6>Tsk~h!flHHZ^##Vm2|{df zax3bT)ur(vaJ8QB#HxS1635?o_;OwNRJ-f54_<1_rQ%8A-KBEun_NrF_k+Ka)Si|IT+=gi z^(Ahkc1+Ltk<=J$25)U00w4SxZuqZvDw_p01Cvr(1Cb7rmtp-^-t#u5(;SF23cQA2 zkLnGRROqeWK0^p=Y56yVC)4`xjTR{VaO1Hz!&?;^QUl1bx+YEvL+fvjjaan6v~H`Z zQa5NyBKu|C_XWEYR{sx<5nPnMqn-&182A+NRfkeB|D<9_ADe*jKcx2B(2t$@_@jyd z=4=Pn#~bT#_^k8G2P0bCmC4!0@&Gz?@RSexki$DPGCa9R6yx2jqPch#-bXnqgjAl`lymZX@rXNaH{tW9da z(|_&fhy{DrE4MDv@%X)9kT#m}bcYWCd+kgbOl*t0COiZx)9tju)LqaBI`K+kEy&RO zxYBmQdYvN2cx#!tX0+XZd{~`w=Cc2;vxGHd4mqSrJGjhrM`94LGBw*A{wMt=3qnq& zC_&JLR>g|ii}X(w7CF7)s(o(-&BPA12bKK;)v#jCMi0na#XqXX`5W~QQO_5{3rqVS ztgLSDu{ka{E4=DIE*45E2h`szsdIr-D-H4;iSK?qAQ-+iR6J)589CpULGt!xYFaFV z-u!s_pt?l8agrvP`ygat=6PjXQTU)K0YTabXV;`oR4JG5Zi2Cn2goj-tFBI43Yt9U zJe*VN-gJJqeA2+*{FR?DvxQ#fj7+cP>zUREXzK+D7jDl#rU9w7Vf~^jAr5ER6f<~i zCq=0u*`65b1Z&f71wP;4^$XW{?W`TDUkohOSd#Ms$7$5{z2Xm% zlfc5st$z{dmq7!5T^5W-7QDH*Qn`~`7*kvShv|P!?2=mRvfw2N6!Glb2hlJn~APnvSf&{n0bqEmLg1bwA3>qK=eQ~!J3C`fd;O?$- z`F?EGy}PxwRr{m6pE}*u=TvvsseZbnG}RRd@m}Mhpr8;cDS|$|w6`xM2nXw>W-TZx zMM0r!Q3A>6crPBpM(jzoP$JG;Y}^mbvWvtNi~8dt$+S+%jrNs|?kPS5HR&4N;e1@V z$6GH!$R{$gD>}^z{Z#80Xt(0`z*ymg01m@Xuy-(@b5C5M#akqa5Y~S6H`Gnc=owas zWCEk)U=@DXp)QvH2ZF`>zmNI9AV1!cv)wYS1@*VH)x%6`8&--sZzE-Phz9wpJDzga zHU)zv)nhbgIL-}*Yll^xf}MBhe_wv5Jd%n@PnUK5-n9ZJf4$+kz996}ket@&iADUG z-QFo)10jq2z;NGfC-{2n@MH(XY?KqI3Tyj&iNUS{J);;pmArCKu{lZ%U9|o_dq9?E z5UKslx+yz6$!d3&_4P7I`Um6aJ9}>Ti6&*T{xFg2m;egMxtGAT-Y>|GlGx53e00~S z-1ThaeLQ14EBEG~riO<^$H(t>dry9Dn@uSq3{hO28ni6AWOjxujEtJ6b{ia;SS8GO zr0ae;^^fZ5&J#{oL)qf>4w5#k6*JL40wwS;XWc>XE8poo4TQsZ4LqcUF48Qpj;GtKIOd8K&unN_4G=!6z;JOP6Gdi-fu4Di*Pq7SXol!Xe5IO zE2E;ELd?um%`}S~fi6%PpHxtne)og)R)Wt?I66f(ml1|%yrWA@q7zAuX{2NHJ3+iT zCcbN>Jk9J1hg<;K&+{fY>O#I0iJY%MI;NX!&rlG5Jnof2a(@L>-l@R3D@ z1g*4Tn`iIZX^b@Bf#k^1B3-Y{I4~VHt>JciFiz0eu_=?L+uiwhHlw-Y_7#3yg@A+8 zt0n2TxqvPzbeshAHC{b(BeL0K&I8RWubM@sWCA8k`PQ&Qifo`}Gwp12O#wN)*tSEX zvUR3Dlu%SYf=cb|IMdM;WI!=(da1%>f_ftAWHXs~d?k{Ae9|tO?5d|kNn~eqZd5#3 z-h@)u7-h9zcXD|b$3Hz&c(u^jo*LQjdMI3_(;A3?=h27eu_{r@BM>rz{^% zLc|N4TbPp>)RQ>P2vR?=ajCPn=U@dGC_Zswu$1vr%wam1H~EGl$?$rHOf$2DdW=tgz}$nZ@ah zJ&Vl4W++hyEk8xbAh~4uITc7IhyMV znSw>%2+Iz-L{k?5HbP2D;)P1|q}@z0dKbs#Z}~gpY2Py$llaw#2$E7ovYJm`-Ba*0 zArl9~{z_&>+WilKl_q7qlFBE|>w3*TOV}g;b=%UE$3xG2^XYkYc^L?ZUr1%yB*G0) zQt#~m;MwKwZ)LE)dGil#wL9T)G>g#!CQBsj4ZUT!M8f2^*stL4B4Z!B_}G>3W37wF zQlYIU1}5?1?tJ#TH$p53!LxZe@EZhxi*CPNGhw6fy5;Z5_SKiwks%OgRiH2)R6q$F z-6R=z!J(_mS4V>SqYr`SctQdv@QG@ix4_l&lnJ&Cj~o=0Gbq`$jkWQ`qbp`WP^MZv zZqhw=$*2!=uRlcSOXl6g8!pz7=HeBeqr7nS(cz)WngNPdM9}4OB9pH*gLpgzWZp3suQ!$oX!`6(>H1$dUJcop>2`@v zN5gwi+ROzzcLIZM6k%4y@Q}iGb9`_J#a}mYu}5PvZa}~s_<+|R@5<1R3`EAMtmTHU zfW1?mszF~K2&otNyDrri9>$^ueeD`2W|8-o#l-xD((5XJ!$ zrZ++evk;fzdhl%=NyY% zq=~Afb~G^?q0NJSil3Xq^t?AkbBqIfuY0O zxjyt@9@uB^=|KXbV7m44khVFtl&~^wXe!efS@>QO9~}y#PL1M6B{Clnu6M*AWfHX+ zDusRnoZE24fhu%|GdL4t<+9T1#jW}0bL?a3Y#xwV@o`(Br1(%)a?XdsnMiRz$lMN0 z;^%hzMgs7TC!Xl$Js#XwRrk~O$ow{Y zz#E@=F+$dR=K`{aw&h{x?O$K4ywzn8%|N-9Iy8K@s7jqCw|yTl}~uatO^I zU*5@3n#QEX_lD3G`NBhz>sgs5N&~wk4%}*rkad3Q8k#Tq};E!T|qCPQ&5h zcWh6U+6lrq1KBIK%rzc@>>)%(6aG*UF@FF1x8G^DsVqn&t%soLse61b9~>eqTGC2f zcS$vgP+&fGtF@A9`G(Qy#};slllc^RT~;6J9RIVHf>Zg-#9TSFkzTXNO{t;=Qb}Q3 zqynm$L8UzN=A)tvEdsfrzc^FC1$Jj>c4qk;n9Z?G;OF~7pXWU?)9p&34)waWok5h_ z!i4tG3m28e_^9mnu{}ABft%LEMQu;X2|uozkdp_2?axM*{zhKMOiz@e+HJqe?iJ2T zq>F!x^_5Y$EwVZ^C{2+SGbK_*{*uy--Obm*00m)1CDw;Ks}(NQ?QRRaYLpfbiSZ&w zNK12sM}`IE^F1zS)U38A^)GpyKT@uK=ZgR6!e{RoGRxcX&U)2o!1b3^LtI9`)dIiG zI1*zk@pH^6R2~J7<$>79`a4)0?h$!;D#(7t#-=}q-ZxGc4ZNnky{Ll z*r`jTkdqup-^m9)wcrgX)LiW5$QO1XpF7yq+u#xZ(35sB_n7m|AK4D>?91BLdc$F2 zLDD7eO786;ky-V8S<#Ryfunx%?rJsP#@U$?Yw17edd-)i#VH1_;L zj&2HsD$cDa-)fBSVWZZ8Fu-X(c0ac6Y1{4}6VkI?bW86Yku0^0(ovw388NWScf4X! z(!$QAIa+I>{S>FvefE3fDc-D$W%JQgLS1@c=+o83u~yr^lQBK_RF+*7=!dS{Cw;Yn7S@^`aGYI9m6l?sH}9M})D%8`z!W)^)-!TbkF>K|k#=y`jTOLvwfPnJ*9e?W1~q9q~b`RvR*g z8%{d&2CPZ{28IrKmBL533#6Nn%+@APSNzg<9hJjF5~rSBAgEj#>Fpok#>< z&p0^!228TkF)$VgLT93Xjc&Bd<`(msYI&VlL^@@YoO=8iPbgT=OevdNSo)(f?;n`K z%KTKVzRM>LL9?X!l>8MNC=8fHpkM%`F%nQhZi3!>f04EhM%$tr_^kg0;ZHR= z?z2v~8yTu`mAMQnM{k+hR*|^z>-89diJt%5ZUzsE_}ONQXd6MCdbUR_7gP3&C^g`x z!4w+HX&$S{*VPws9F(hx>$L2@6U#8)_Pl$lnx>z2Y2FPrYCRKznH)%sHs(Jw7M5Tz za@SxLcE=#NcSRWO*!lVN@#UNks^F%F42F?8n4Z|!r$IN7pCaRv%7V+t{*Z#~+FsS` z^^I-l$TUCF{v-JPhK2+n{jqCY4o;rh90DEpxY03kTVm!C^vqz7_P!r_cl}o|YnjSz zx8qtl|tt8iQMO^m|CUSL;gaEdy>c@C_p?7=yZ?swHaKFvod$^}YfXLV*)WMfd#ZkuK{_?T;Z z`kj+=PnG@q2)U0ic>a&RNEv{~Q}p1#{;R-IP34EAc`8bf^uN*=e;6a*Lu)l`^1}V% zYdfV=8={hJUhHa!`nA(qwy;m$t<2CdTbPO$y?OCoR&ukXn%H#d@gnc|*%Plw#Or)e zau{ieu!HlnSq#wM1EtH%xd`=-WB{T{4DrWGb&j)2c-C+T!{H8=f-LT6bN|O%%bTun zI;@Et(4Y-vmQ%ujR_g|TO0U5U7a4y!a?mqXC{VZ602WZIiAS|Nk)X?IOYu^y~C6-#&)Py%hq0aM$e>IDylTfCu_ zM8m!oOW($i?j`{%ccBoqp*wWzMyBfM=C4&pG_cY_M8Q7T$_z@%GxpUN!`AdZwjo=^ z7$`eo{vL$2qi&GS;#SRsTlrZ zdoLkTvBtg*7o>{3oxZ0}z*@H;ar<)9mSmgrNUOm}UO(II1sjqJoD=!6pSlcVhfFan znnrO7G)1(pzh_A%1k0qD(Yqp)WD5zvJw4QghM36k++_;Hy)ZG{V{k)9jAmac!xQ}@fZlTP~ zlI&ZPr8e}nncHIDTJY>eiy$tc{jgl{Z+LAa6_H@cD>2f%8YLXFTs-0juX_?*R$H3#CSFD?BNU81i}g;=q{h$PI)jzGbd-pM=0ST0pz>VCWjLdc$ z8}IN|<;5?i@!7^uUZ;K+k>$|EHid*}@cT)+i|5vPpC~S3fw{$^qNXrdf_}f;bTBgm zR<3;IQfj<&E8i=nS}`DYIJ1_+r5?c(j&32t{8k5Fm)FsbcO|?Bhc3Upiy*Q({06=# z_SkW`za!FkHq(#rb1%2DVSRZkELee57BSgtc_a(Qx~iHkg+6rrO}O?;@ra6)XCa($ zq9qj(?rNoF{%C}o{01t+I1!>q=N|d0?Y(EU=reUGIb8P*0BS)-~Qs;Y9biLG7A6+3RsZs(jt6rgr`&z12lT zt*{lX-#^T87KU|Bm$y`02C_Uk@a{56pWTSI6Xnp)fIkBrTR@M&4O4^>lX5U6sF9?E zgu-q16`pYf84E%p5{s_Ws=IkgYX@I~j zn}P8^i?wizT2`JRH2pb6gmtlrdhTRaW-t}So9{|4g$BMPvA4>SReiNjrYM?KEQ>|Q zWw-<%E+ee4&NKBc3s~so5@gTV@8upK&-KLs$gIDE_+nS(vH>L&k~v<{X_HDn32Z_! z+R*-V)0}=(^J%-`Gv=?+#T_$KlMx)H=GM);W{z8MP%=EwQjT~)>`#nItqZypViT>uT-X#ZerVcoJvcCHZFI}6|sJ* zrXz*e$D4U8funbCoMR^q19A7})!P=8bOnZ5RWjQmgwd--=h2po^1Aza!8eg_eEED~3x(%S*i*1Lh!wu&coj6T+ zz-xEd%DlcvDKkQ*MYYbe8B>zm`IGgtTp&iVz|d%a{U2|>yXRK@Y(Be1)CJch_K^vt z=x7VgcTfRFG?Cn2r?460)+#^6w2V}ra5o;#_9UzOE6CTjq=EY*Zw!Vu-K(~rywBu^ z>k@EZ{Vyx+8emZ1J)D-=W@Gi98@wd@(9yd*HJz(!a&@~en_pThxpu6-{`yPX7?IkU^J4?4h;cq0`mnPc5={7i=j#x-X)+Z6 zk`zD;XfVUv8Xei6v?ZziHMMGfwOQW#oUH$}hhGm=XlY@QpanbwE@?VHUL7qZ+R)58 zo*S0qjmsi^yNVlEhBrMsyByR=)CV8U;#cqBeD0H*vyM4!-|wDj$H$Q5q4rUA-4}Cg zuNcIjq8`*r;P(ptIa83g{dM#9Kp4R!9NLa8p^RZE2^Zl+$5JD=Zh~M@w}x14c*Y(z zWjQ)x6$}0+8_pyB@@{JJ zR){;=B-&!CX0HV?a4LBdD2r~Ab1QDDFc6pm^uNQe zAJ|@Scgo2+;!t)|?#%-u6S%G=jg!6B?oID?h1#A>T}zy@S_n>_&Up3RI#i&szhL)? zEr4V`;xpD5u0$mo@>N&)l>rq+UpUt#P?8 zN#&V36xI+M<|T2t&ZhrbqAwh=ewA-AGP6qf3m+Bu1NLa_nE8M<^w_((g0^8{#2>sH z>HRI;{$NS8?bwox|L+3AXzaMRbMWH{z4H6hO)91ZJ9W7-dd|G?}06?UwqM-NqzI=QlKsb*h z$A_{?0Du{+svvI=xUfGf>8@`}KYo1Zr==CFY97K8yWy#$K&k8F=R?WYDX~-1Am?*b z58Y*34yCapAl9SfZ2SJKZ?wG~m5E|Q(?{15Ghlko9OA-HTBNM+HMF+6{60#8S*qP> zIvv(B2`w@s)dhW2ad^KBumf}d9~PQUw*OioO!xbBkK`e6yFAAJl+ ztsE<2^&8Q%Nj|(tgeYE6R#}r`cjIx+XPC@LT zcmI@N2SR!aDCwjctT&e@eF8kt<4ep<&l6C}dgs7xnjB(h9Zf3*;Jg%afqFcw$){4nATPZm~6+%78f z{p)Mcgn5$q&&!NqS?HJ)6yd^+sIb+=U2*o^1oj+HvXX+HzGU8A+1KM<=!WhRvtK zK67jLPr7%kqW_|0?cmomajwrf$pGgwo|z?JC`-3YB8~TkL#m+Ar4&nGd}?gI*X9Y8 zsyBf-VO7G6Ns(?a%Z@LoP;MaTCsW9eXFagg2_))gB_hztEI5 zrj=iDOv`8HVcINemaLd~BRIV7@%rH9f%IgHu*?wvNIaUcvo4~FAUTr5R6W8L=)aM= zczusX&?(*VDipT|fMwT&?!H3;MR=_MTx3xqvOhTvj(Ej2(6POYRIWu<*jJ=y2L*sQN#o|G;AyoDV;_%Wmn<|h&d&l zfH1yNX#=tkx|TtFYGP1aoz%rrh)E!PnAAL+hDF+f-tv_ON#6YdxU;O4=#OE*5gXRr z=aT2#Ge)_JO~f?qefHkJbH3b|X!r5UAWAf^s$evRE0H+`-xC20K;o&_Sj{Qah%u$) zh+*+$d^JwnY1KDbi~u#aBg{+_`yUv3S0I*?h*c=1-F~VE*xK{Ns;E3msO-Iv#a9l? z^xUOEW8M}ImuaeU7UB-_^WyMBqapM%F8uv3y9&Qo)QaXxAdFZpjmBKg=XNvH9!dCY zpOQ%L^z-h65N#&48Sq#;mVt7XF^~`Ou3p(81td+8`N?{eO8^ z=i^;@{CDb%)8!Ut457ee(LKV91B_dPik~aZCt&IZ4`=C0^C_W4X_@R2B8B$ef6$t4 z+`nTgths-OkTkagYxAZa!YNwUJAr`aH%yee6=^~^Xf`8vsa+j7UVWC-n%L8(Jywvr zk6A+(hs?={wXZt^><3RnWUy;?jtWGl7}#3A!)NUICFuNR0Z47 zykd-|us8eGEy-z~C)BMyF_lb_v#(z59~P0!gochNRB5+`i=C7COl;n z7~%k}uM=|8ZGhQS62IYEdTH1#hWrX80b{oBU%U&t>@fZI4vU2^OhN1mz~&LCdxZ7( zJPT}ulc^btona@@br6l5pq)8qj5xvR?K*Z%yAq$o`x$}EBYIIwjzrU9-P^{C1PKJx^uxm?<6xLYi&3I>kBCGS?QY`S}HRmwn@M3^ig+{s@xZ?Qn zk$q!u_BmJG@LN?3V604+dhw0|tA+ri(VtA*V`6{QOZ~QR%-=fhoJ1hn5y1nbs??`v z+|!+LR=?Z!;8@DSpxgT1-aop%z=#rBhQAyBv1*e-^lYK-fP#++f(T2xzf#_L zf5X`K&KRUEt#mnIY`eageP*A%bVAt|6uN3#Lb~i~yGdQlHu05ZD3@#*+_)?RbtfbD z!FkN)qb8?_7q;)e0^xmr5P0&by7z$8$rKk)SZd|{o-*|6iiBDD(1OO?vL$&+>Y7V1 z0D!Xim%wZD;y)*Io9cZd(pD~1WkRRIC!L05&)15M#Ln-Zsa@W(O~9jZy{~WT#>FRd zt*?uqJhdJg)7!!ly%EIPn?!KAbSX{U_iT+*unYIjds~eMEF+S1c^boyYrp6M-#$Y@ zGoeb#c%p5T>Re|NPupg&)$Xygn?X`OR8mZ=NqNu_Rv0L@g@fesOhfjjfh-pD9@!^+ z?zQpB+RJCXOF8H&V|dP^CM8+PILyf!8kMq0`W{kB=?>yoC2Sm2Tr|_;TnSn1cW9Lw z#kYPEXmi+b^hr`CPR^%%1%49Gc_o56*+2dDZnyWPp%=rGneh>Z86-9}JtU#1m{Uvd zREv3Yob1?EMZ0uUG?3k(I?md=WWt{wGs|U$bF3nN;I+Xs;Il2!w;Py+U>1jn0hV&P zKn&3Xmal;F+tAFv!Ls>T#OdFBjV`eVEf1?-9(3BnmDX`sYM)F*Z5xg5vp!&hHM24@ zlkxCGH36cZT+AiGh0DKvxjM8yy%qBc5i@3%Jm~k`>oJ*P{`>EneUVcN5zhj`zw8bK zsjY%x8{jnv!>UEqs?CP^sG1f^C~yNoh6+z6Rkk8^Ogw$Yn4<)11mkKnKDcFl{(v)s z%bgBmeLFmo+*zhbbs)9&i{$k-3IA&6`_ZXG^r`boqAvzaO+m+mV#EB9?|3Geb%el^ z_dE_aDF)IXXBkMSTW#GON{*>P3;eGtEi+t4_Vp(7X$d*AywPQOJr|Q6|kRJ?6O{&+x1am)a>{43R>Rtq6JFEfM-i(!TAyswQ6?9%; z5gkoJ*MXRIWBSCq-JK4Mk;Aqp5!NJ2yQ?o`?` zkGMT$X(-sK2NHO_lxt#dpWM##vzb4sG_n|E6s<}|UPi9MKJBZpaA;Rb`r|Mnl}u~% zE1+&{l!b%1NtKCC`7wkPAnv>%EjtC~+ozi+b1oPf`M9^iHtR%d8%qV;D zWkXIHni$a*qD}2csoOY8PeD#+Nw1PxHef6xk3EjMsfj^vK!os!xY+L7QjqRDupg^w((wsF!bbwnLD|1Y~40xcj?o*cd<=bw?&f_yM#Dh zyXP*CCc|Sp4&btNeD15YvAU>}+x-slmfInn!s&`AK~ybi#gpsz*)9i!yBl~j=Y+cM zh4QtU4GhyVvf9v))Yg^=A%ThivVRv>grRa=B(v>k-2u=P3Y!Z zS(}tvlUA<&h+>U?^t5=tN{CbH$}Q&lOR>d1|FYe-r)GYZILjEA@y0x4Kb4NS4GoGD zvdLMVz_`jTO1D^IZjbp>0Q8sy$2_74jzrj-)JMFY-2SPx`mq$o|3Q>P{PSWt0$P2AWO{9yFHpy6z`kym z`(!xJCB*lf4EZ|s*iGum(LkZ~x?r=pe7Qj#I)kF9l8&!=%#ddx3YGyQO;=!F@QWL5 zdEakk*?j^mDI9ZE7K}KLDqfs{)^ZxSQz{iu(w1Jnm8dZp%8Yp>}i3PY>ZQQl+SD3FnWkAUUo6|FN{O z^~>#PZ9j3`j!)6Y6U4k*LvQs6>!PS-H#I3<=($$h&(IXj&V63Fw~NUK7k!VSf?K%kwZc#xAo~q@)de zyAQ;e`etv&_hHx+(Gu}UW9!LTJ8U=e27cWhnNQ})e|3v2Y7Tm~@j2PQFxc)*xzW4F zYIkyOUF)~Em^7dFr#6G!3$D*{1IUy97_?6yJ;bhVpJlf`FH37Qog@C~QW%m8AE?cM z9iJE6V}MT#GhQFu@HdPar-ou~>iy3su5}009VcdW)k^It-7w%?a>aUOGDa0$T_Y6? z*w6gaI5kWPEld0Z;*M@vyl1q_lUokc^KQQJ{>hHJlV1W~Gb7|+hqEUqGIC5E<3QTbNVs!Ae z;T*g+?O)(GC5|WFcU#ZeDmI$<&J%)j<|{u*)s=WEwkKPAVeSo7+{`UN8@KVQ&o-cY zLAyh*#hWg_YHvA$OihjIiq$A6=?a>cgB|oUt^*VQsbuKYtLXfz7L%scY|iQRmvOib z%>ALqBK4xXGy0R^xXlTuo{!t>`A3==vRi(4FkMUy;JxadB)=X?kIQL_#-^}Zb z|F505sc1+gsM`fF@w^-_xNfBa=4i#wZ7F&<(;&(X1T{Qq05&O`v(SM!>1L9Iaz%td z-4Q1A5=nk@_?$d8^{N#8eLc3A(v!PlQ5hZ&f%j(_| zMQt^mgN@jrmiI@zT();8`Mgs>KAS;a?v??YuADidUMxETs?Vi%`2-9kf?uRQG>$j* zTk-W%|C=GO)MD~(;qM!;36ha)HeUu7JOyw8d!w|c-PbRQMq90)5`&v09!ckO^4YL8 zETlxfSc=cDWp#R4qtYM5CiMcD0RFp|!C&@1wf`DKNXEXCj{n=L0tSCaYuT2%X!>4M z6!8I5(cw4fc%#<(OH|WCtbd$wM%b?v$pBEq>%)ifz!_)hRq-qAg=ut)7{NH0-;XmklEm_zF{h3b+nd%kTXh3KP8X7(MIpFw?;=<=-q`*YGHoS0LD=b4K&Z0H(LyWN-a}<}( zCX|~@5YQjPiOLvNI=_)G*K#b&{CmV&AoYhe>ZK~_Wz+zL3QHm^PqM-Fo1#J=DMS34 z-av$(jR%zvwaupKwQT|00s1kJ#AC7bOb|e%D;Cx=#(-;Xs=^fJLZK}qF!r@e5SQe6 z)1h2$?kCB$CRqlbOFDt6Fy((kErwR1@R;E#;h@WcECkfdar|TYo0j>H@M9BqzvPYc zj@-a2Zfo1CQ9iUDBeeKFKDV9w>%X_S0Zl z)H9JIZq1}Kj@c%%eDgmV9u0zREnnN0*#lM#9L86kE(TCli%7))oC%=>M$tlRdS#z6 zwIo2gyf|!1l|cgyiguHHuo_>~pxWngNXsnZyT>xB(gIb-`BqqLc?lVC)wA0CxzGE( zGS~PB@t46Ox-Y4#&PWvWcAwg}j(M{&8En#*MIax*bQPOhIowO=R!D`RU*5K6jyuA6I~;|QOTWMF;@^FWXE-(Wd^Isl zs~TP#!RnI3StvMalV?)xud7Pt`9w0TU_OX-yw@R4N`sIU)|K`HFBFholBy-}e5Mb<7>5ceiG17~?n!*q8rF8?k2B_ffeiQ~D_KiM*;0TCzEkn#0>u=Y5U- zieQ{uYKvJL7e)eA8cF>lwl8;OE5RLseG=1{X@$4;D1qA&O|_U)P>tl#mT{FnE@VmX|(x6D<{QO^Xc9MrEIn@Io-2?iK2l}4OO3J>xU+^|fm1UZV{O%U$``4V6~=eC;(OolDs)Y_^%YM2{3ZiyjI$>7=qVrg{ha8wX?p+A z^qY8`@N~P89gft?jkLhNPxljt8TkqRgncf+jM(W~FAnM+qSAJOoshCS(pp>_`$oJ@ zN(43=ED&UB(uVmcXw6Jf!PXF?f2gwv`E^xj6=T9)p}KkJ+f2{!ubgpu?cB}4zm=gj0!gFX$*j600U+BDoXXbOgHV-_j0gX za35B`uzYd%6ZAjRQ}@ETD_aHrRHBeNDeY+xGMZ#w#p3k5cQnbdl}Y?SKjMl{aDTC?dyPi+Gt>+uD%UKkP-Y_57zC_l87r0}_75lQ^CP1M`d>hr)Cr(ylQa-!uczd3 zQWAF?wBGd~PcCq)Xkn7n3i{x4o=|=k`mk^9(4f?vB)VT;I(m)O_7@i~-&?+?K&6yi zw5m z{#@l-v;M6>Tsk~h!flHHZ^##Vm2|{df zax3bT)ur(vaJ8QB#HxS1635?o_;OwNRJ-f54_<1_rQ%8A-KBEun_NrF_k+Ka)Si|IT+=gi z^(Ahkc1+Ltk<=J$25)U00w4SxZuqZvDw_p01Cvr(1Cb7rmtp-^-t#u5(;SF23cQA2 zkLnGRROqeWK0^p=Y56yVC)4`xjTR{VaO1Hz!&?;^QUl1bx+YEvL+fvjjaan6v~H`Z zQa5NyBKu|C_XWEYR{sx<5nPnMqn-&182A+NRfkeB|D<9_ADe*jKcx2B(2t$@_@jyd z=4=Pn#~bT#_^k8G2P0bCmC4!0@&Gz?@RSexki$DPGCa9R6yx2jqPch#-bXnqgjAl`lymZX@rXNaH{tW9da z(|_&fhy{DrE4MDv@%X)9kT#m}bcYWCd+kgbOl*t0COiZx)9tju)LqaBI`K+kEy&RO zxYBmQdYvN2cx#!tX0+XZd{~`w=Cc2;vxGHd4mqSrJGjhrM`94LGBw*A{wMt=3qnq& zC_&JLR>g|ii}X(w7CF7)s(o(-&BPA12bKK;)v#jCMi0na#XqXX`5W~QQO_5{3rqVS ztgLSDu{ka{E4=DIE*45E2h`szsdIr-D-H4;iSK?qAQ-+iR6J)589CpULGt!xYFaFV z-u!s_pt?l8agrvP`ygat=6PjXQTU)K0YTabXV;`oR4JG5Zi2Cn2goj-tFBI43Yt9U zJe*VN-gJJqeA2+*{FR?DvxQ#fj7+cP>zUREXzK+D7jDl#rU9w7Vf~^jAr5ER6f<~i zCq=0u*`65b1Z&f71wP;4^$XW{?W`TDUkohOSd#Ms$7$5{z2Xm% zlfc5st$z{dmq7!5T^5W-7QDH*Qn`~`7*kvShv|P!?2=mRvfw2NO)OK0^yduPNUvyKkgBYP#Q-d9Ld>%YeR1rB8SyObKJCjh{X zGC}IuM$FTwK2Mz19m&7WPfLcK!m(lD2jAX=M-%S$=TY8s8icM&XII%;f-clMNlKa~ zr6k&x%f1+Z0UO~!F;Iom1t3}q#f@x=49Z@*{s!9d%OGs#*Wy;^*vQA|ZzF9ay9PA77q(o(*JSaEtRW8 z$Z}kXY*u@~w_%mXUvlcv#HE`ibJ{A@CBNmi+JW$~4@ozPO!8}z;{8%SVV&5cDH?Qq zv=Vb)D_%W1H#LfD&yMNOpkV%L~T#Uq_fLIkHm}CNC8kcQsy^~{_nz*m`D&$AA z1KV=T->XdkPBQ5~?ZL@puLxx1j5r7JG$8>lWCe&k;dn8d{tCHB*AUMI zkoO>z0?vj+F(p9CsN?Q_UD(F-yygi%SHLmt;3@b`rq=6C*d{U(2OCXhi%WHlbM9h+ zFDX{UqctNw;U}^L`c)dXxjsun0exzjAE-i9Yr2>?|zPDyhdKEV{09S$P7zr@MZh znKU12HykQBbqVEeY#!>*P*=#0CG7s*l9hS%uYC5Zi_Cw<^`{AyKG4_L7{CO#rI}C| z=d^sKN7sWbdec{?%+*r|xklc&-GJhCh5cWrF^~46I~=wtgn;0fZCQo#H#Pr|ko(_5 zc$BY5h0}fb)nfJJBewn8v~(9=8I2*sE}YfgqwAs0fy&;a*kbBd zvmO|bF);plDm0YQ)iHp@{Nje-l`^ zu6)L>ux95Eq0;RuMgx`>k3ie|v3)m#k!p-yj;(K`-!5kmNT$6ddi*z=Jj7@*_rh7Y zc9dgt%5)xm6w$+8d!h|?-$p+rdohf|yemv)xZC?6vjkV^*_Y^aQro`^hS|<@`^?t- zq^$B10=@SvmVAvSbbYMk3}njMkZoV0|6Ls0Ct0wgIx5|}p+}M9P)MF+F_c8%0{Jy_ z41jpMo}z`ejjpAhy^f4eu^)cNY8iXI9m!m73<}Qs?#UTuoAJ~_;*&O2TZovMVW#>sT{ClzyJGlR}hjPO-ci)A;0z zXE`Gij?ZQ`<3%#Ky(4~$%AM$F@B#XShF4RAO6+)T(Lx4C@7ykSTB5R>e-sIQ{*}iJ zS(S%!8tLo}UU+oSOcF(&VVGF`dJqk_k8RnxZy_055#GPVZ2821Ds zu;G5HulhDxeQ4@J-j;_Gj;rc&Efl)<#iPwHK?Hm3p!;&Oh4So7bhuOFSl9!bHqK@f zWs4nhiuNm^j%pyxYMR<96Ow~Ztt#qt8XOok7cC@jBtE{EF-Z)Q#Zt8*yxn9=MdksFKr#~+H-JbmB_G;@mgaZSA8Hj>-`>XozPutV`Y;uWN1RDawJL(e_Ir|4Q(oU=CVCfujq8NoZggm zeO)KY9~_^-Sz`{FSqZKM4c*Rx zG}Fu?7^KhbiW=0s2BV-7f=elq*?p$rJ)w9Vh^-wrRU-{n#8wT`&0lL-PpB_?5pUrvJF{+R+t z>13|yvqsLfD#f`9(kKzU-Z${v0!ZV^sQA<$`Wv|k8I10u8Y@1mdV={iZl109Txm@a zGvX=Ki$oEHVgF;EXB3^0VAIM&$*{KdV;S^N>qm-zuec-7=%WTt5ML+2*& z76D9P2Yd+cf5>lk$pg2Nu*INpWh-h{XXuW3p7L%ZzIzSBJxRX(v&`x!K(RiaH^=1z zIGM8-$!z|br4jx>iG+R{+?95#z0^1u2wuFO%)T*~FhdpRU~>0NY~Pkq6I=}ohPjNs zOJ^ECev5H!YrnIysTLdG?6h6oH{U56B{FJVOm;*@{j*b4rv3V&r9AmixNuEtI!PGy zvS{RJ%+(-kF}&8?KNf_(I?N`jK{bjTUn-s&P8nD_bEs2&nAU%?S@81}vhJb2N7wE5 zgF(%XuXyUTOC@Gcz^Pfc(C@wICyeuQ6^nu~L*3%E+Tg_$Z;CuPUC?EaJb6GLa+&fQ|6pG&?P0Sf6J8!>P5$zgO@ z8qL5Fz+y2?Qr=YIsmcft0R&e>C1g=RTvuF|+nlSc2D9#pRMuKBmhsy1@vdDc?RAP* z>iz|jglUbK8jPr~S+fu4($$Z;yq$wOSoI631#o_8tm1p}N255S2QaX1U3eqt`YO@c zutYaes%XIY3A(3eAZ@uJ`U!RZi&O@i-In2YX}*$9YO^3+-2tn-#j4Y9d6Grq!|$xM oFPF0Yzk?K%@ThmGJiPgwNg{Cut;j5d{~Oo<69W{o3E>j|Kb#?n>;M1& literal 0 HcmV?d00001 diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint_skew.png new file mode 100644 index 0000000000000000000000000000000000000000..9ba1318eff76b91270fd50700aec7f4ba5ed6b84 GIT binary patch literal 3035 zcmZ{mc{me}AIB$QjvP_x*d#)3l^mO?se~L^p@cGLn4u(d<;QHKkJuH}x!Mkb%4Sv@Q-Rqif6l4+d) zmng5RbEa#Te9-^!KrO+vUAaWbv^6W*K1M!PULG6$(o0g%TOIiG^f8kgff?qTu&G~E zoO?7*dvWDD7vICgZuS0wtwL6Bu0Jc9-U%&YPYT!5ulVhupKmKF(S*+4FOF8pM8_Pi zv}!$KN`wpA{#W^*?K7YM`X{9^X8!6Y4!L9OHrl!=qb^JPu6;Y<1$5ya&ceI1LK0#s zZpr~{+4+9W@aGHd8Y!9HVCb>C)H~VBBgZ!E#uGrZrTr_jPDNbhi14yLhSsFvB`fQ& zc0@d(I;1~gV4u9}n4TfoHN5QXeno80Cvuq-TyuVQpj7cA{nggKbr5HM>B_haUn#OY z^<*4F7ZC1wr+?0*pPD9nbStg?u`w480%*We4UyyX!k7sV_ z&U{bKYNhNrFLq+0zW7Lrw`&Elkh%?Y*~m`9&RS~I{@QbZFDXFAy4F@Z75bkDLCld>^YIKen_N#b);=hR9 z$JV{DK^{^s!{I;hsaiH0rx+eLjBkz`mN5O1M9DP_7qt8iZ^~j*Z#75|s0S{Or?EU> zv?cdy)Z?Wc}7ieBC+)+VZiizyxp zQ088Ns@s1PW+zgTvaLZ4c+UyuLHF(qiDV~^FVH6IgFq>xP5>m8qaC^U(O%HT_Z70uMd1%_5^sh|N0XlXN<|M!CkD>kJ6Esm zcQBhD1SqUk`pi(H^1ww=J?zN~r)kIU$gr@Ju8LFvsfh`c;w1zKHy{64VKRZ>4=!B) zQG3VTkL0XTS@PDS1N^G$kLP<_$BPhJYkY;Pp`)4by=E#>sUI6^Cu^01o>0__5wZEDJpD#Rl=-J(V9}EAL=AthKi)-Ep zOEQ_OcG)}dAsFV<&_7VHW?7rw^9=^A^`turcLaxiabs`O!*(6|FUy5UA#eQJm^6 zGR?cUCM{BRmDZIdd&p}+$vZYH=+mwmca((Xfs3HRXrhhtNzilL3Y*DUp~A1rkNG=M z7ru96)*?cq4icuuyzkE^O0dL`l*TABWf1KxTlkJIHy!mQU5EFP%yYc(t6T1)}2BTr@~3BP!XNr`*Ft2r|PS75RYGoL|=NNULEpaN~9;z%x$2~3%HOJA0#5o zIF+Ah(*k_A(%~3%EPGDS18n?LaKyi(Fl&_PC;jHN^w!-)V`R`)$78a?_3zjb>n*p7 zW%bFI7eD9HR#OFlibRvcteyk%fqB>+K2m@8^s-`OlU`%dqE#do-+u$Ssj?w2RvN$- z@3uWuvJ$1TWPZ}jY2oVo2Ev?j%Srb^2VJ3+mtQGq5FW%{hHme&gl<~^dA2iUjqrPa zPpV@|#nUeI>{&AbL?cSNWyi09LR5^Et1?DXPF$m4+9bd`5s~(7OL9Et5k1oxbM7db zstw{xU}LjwWo>8-sCe3V#pyk?j`2sF-* zw9|V*600iq*%2QLl{QQ!-_hi|#R6~+6xwJdZvHtNR4@#bbo7_0{6qVvvPlFp%SNvf zBTC$>iXrof!z*o1&&-&Rq}cAZvQq?R#jVxVOt4 z<)d&0zie(k(TjL-nr2=NmrD793)a@IaepYlAlz;vBP$qlopPEqj0#6nHC+E}zL_Kr4d0eL=wdJfOFqnG%7IXLk>5|j) zP8o%ccT*=~JN%U=W)z;;vjqx7>3Xh^khzLaG9qX6|Cv!>jG7mi7Dn0V3wFC3eGe%w zh|`5iE;Av68yRSsf|)smX|`t&i!hiI$ou3+6^%udGb4ITMP{=Qu-g7wxtpO?RadME?#-%3`5c?NlE#*T?)hfUTcVd{Sy&yzs4D4TaSUi;m+F{T_mC@rE52`QzQka6;IO?@mf6&b?!!jgW z_k=T2^L^e8^WWpTYLh_OGrPD;ZFtyHTVyw^86zKOJ9$%=wcQe9-TSm%7_;)SHR|)6 zf_Q%HLOpflp7{vvYEtRyz%7pYavZ{Myw?M)1M~}P0?^7QP3h#cTJHu?>LLG literal 0 HcmV?d00001 diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint.png b/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint.png new file mode 100644 index 0000000000000000000000000000000000000000..bdcdbb76f7c1cf72d4a89a9b67cc5e6dad34a21e GIT binary patch literal 2951 zcma);_dgVlO)OK0^yduPNUvyKkgBYP#Q-d9Ld>%YeR1rB8SyObKJCjh{X zGC}IuM$FTwK2Mz19m&7WPfLcK!m(lD2jAX=M-%S$=TY8s8icM&XII%;f-clMNlKa~ zr6k&x%f1+Z0UO~!F;Iom1t3}q#f@x=49Z@*{s!9d%OGs#*Wy;^*vQA|ZzF9ay9PA77q(o(*JSaEtRW8 z$Z}kXY*u@~w_%mXUvlcv#HE`ibJ{A@CBNmi+JW$~4@ozPO!8}z;{8%SVV&5cDH?Qq zv=Vb)D_%W1H#LfD&yMNOpkV%L~T#Uq_fLIkHm}CNC8kcQsy^~{_nz*m`D&$AA z1KV=T->XdkPBQ5~?ZL@puLxx1j5r7JG$8>lWCe&k;dn8d{tCHB*AUMI zkoO>z0?vj+F(p9CsN?Q_UD(F-yygi%SHLmt;3@b`rq=6C*d{U(2OCXhi%WHlbM9h+ zFDX{UqctNw;U}^L`c)dXxjsun0exzjAE-i9Yr2>?|zPDyhdKEV{09S$P7zr@MZh znKU12HykQBbqVEeY#!>*P*=#0CG7s*l9hS%uYC5Zi_Cw<^`{AyKG4_L7{CO#rI}C| z=d^sKN7sWbdec{?%+*r|xklc&-GJhCh5cWrF^~46I~=wtgn;0fZCQo#H#Pr|ko(_5 zc$BY5h0}fb)nfJJBewn8v~(9=8I2*sE}YfgqwAs0fy&;a*kbBd zvmO|bF);plDm0YQ)iHp@{Nje-l`^ zu6)L>ux95Eq0;RuMgx`>k3ie|v3)m#k!p-yj;(K`-!5kmNT$6ddi*z=Jj7@*_rh7Y zc9dgt%5)xm6w$+8d!h|?-$p+rdohf|yemv)xZC?6vjkV^*_Y^aQro`^hS|<@`^?t- zq^$B10=@SvmVAvSbbYMk3}njMkZoV0|6Ls0Ct0wgIx5|}p+}M9P)MF+F_c8%0{Jy_ z41jpMo}z`ejjpAhy^f4eu^)cNY8iXI9m!m73<}Qs?#UTuoAJ~_;*&O2TZovMVW#>sT{ClzyJGlR}hjPO-ci)A;0z zXE`Gij?ZQ`<3%#Ky(4~$%AM$F@B#XShF4RAO6+)T(Lx4C@7ykSTB5R>e-sIQ{*}iJ zS(S%!8tLo}UU+oSOcF(&VVGF`dJqk_k8RnxZy_055#GPVZ2821Ds zu;G5HulhDxeQ4@J-j;_Gj;rc&Efl)<#iPwHK?Hm3p!;&Oh4So7bhuOFSl9!bHqK@f zWs4nhiuNm^j%pyxYMR<96Ow~Ztt#qt8XOok7cC@jBtE{EF-Z)Q#Zt8*yxn9=MdksFKr#~+H-JbmB_G;@mgaZSA8Hj>-`>XozPutV`Y;uWN1RDawJL(e_Ir|4Q(oU=CVCfujq8NoZggm zeO)KY9~_^-Sz`{FSqZKM4c*Rx zG}Fu?7^KhbiW=0s2BV-7f=elq*?p$rJ)w9Vh^-wrRU-{n#8wT`&0lL-PpB_?5pUrvJF{+R+t z>13|yvqsLfD#f`9(kKzU-Z${v0!ZV^sQA<$`Wv|k8I10u8Y@1mdV={iZl109Txm@a zGvX=Ki$oEHVgF;EXB3^0VAIM&$*{KdV;S^N>qm-zuec-7=%WTt5ML+2*& z76D9P2Yd+cf5>lk$pg2Nu*INpWh-h{XXuW3p7L%ZzIzSBJxRX(v&`x!K(RiaH^=1z zIGM8-$!z|br4jx>iG+R{+?95#z0^1u2wuFO%)T*~FhdpRU~>0NY~Pkq6I=}ohPjNs zOJ^ECev5H!YrnIysTLdG?6h6oH{U56B{FJVOm;*@{j*b4rv3V&r9AmixNuEtI!PGy zvS{RJ%+(-kF}&8?KNf_(I?N`jK{bjTUn-s&P8nD_bEs2&nAU%?S@81}vhJb2N7wE5 zgF(%XuXyUTOC@Gcz^Pfc(C@wICyeuQ6^nu~L*3%E+Tg_$Z;CuPUC?EaJb6GLa+&fQ|6pG&?P0Sf6J8!>P5$zgO@ z8qL5Fz+y2?Qr=YIsmcft0R&e>C1g=RTvuF|+nlSc2D9#pRMuKBmhsy1@vdDc?RAP* z>iz|jglUbK8jPr~S+fu4($$Z;yq$wOSoI631#o_8tm1p}N255S2QaX1U3eqt`YO@c zutYaes%XIY3A(3eAZ@uJ`U!RZi&O@i-In2YX}*$9YO^3+-2tn-#j4Y9d6Grq!|$xM oFPF0Yzk?K%@ThmGJiPgwNg{Cut;j5d{~Oo<69W{o3E>j|Kb#?n>;M1& literal 0 HcmV?d00001 diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint_skew.png new file mode 100644 index 0000000000000000000000000000000000000000..9ba1318eff76b91270fd50700aec7f4ba5ed6b84 GIT binary patch literal 3035 zcmZ{mc{me}AIB$QjvP_x*d#)3l^mO?se~L^p@cGLn4u(d<;QHKkJuH}x!Mkb%4Sv@Q-Rqif6l4+d) zmng5RbEa#Te9-^!KrO+vUAaWbv^6W*K1M!PULG6$(o0g%TOIiG^f8kgff?qTu&G~E zoO?7*dvWDD7vICgZuS0wtwL6Bu0Jc9-U%&YPYT!5ulVhupKmKF(S*+4FOF8pM8_Pi zv}!$KN`wpA{#W^*?K7YM`X{9^X8!6Y4!L9OHrl!=qb^JPu6;Y<1$5ya&ceI1LK0#s zZpr~{+4+9W@aGHd8Y!9HVCb>C)H~VBBgZ!E#uGrZrTr_jPDNbhi14yLhSsFvB`fQ& zc0@d(I;1~gV4u9}n4TfoHN5QXeno80Cvuq-TyuVQpj7cA{nggKbr5HM>B_haUn#OY z^<*4F7ZC1wr+?0*pPD9nbStg?u`w480%*We4UyyX!k7sV_ z&U{bKYNhNrFLq+0zW7Lrw`&Elkh%?Y*~m`9&RS~I{@QbZFDXFAy4F@Z75bkDLCld>^YIKen_N#b);=hR9 z$JV{DK^{^s!{I;hsaiH0rx+eLjBkz`mN5O1M9DP_7qt8iZ^~j*Z#75|s0S{Or?EU> zv?cdy)Z?Wc}7ieBC+)+VZiizyxp zQ088Ns@s1PW+zgTvaLZ4c+UyuLHF(qiDV~^FVH6IgFq>xP5>m8qaC^U(O%HT_Z70uMd1%_5^sh|N0XlXN<|M!CkD>kJ6Esm zcQBhD1SqUk`pi(H^1ww=J?zN~r)kIU$gr@Ju8LFvsfh`c;w1zKHy{64VKRZ>4=!B) zQG3VTkL0XTS@PDS1N^G$kLP<_$BPhJYkY;Pp`)4by=E#>sUI6^Cu^01o>0__5wZEDJpD#Rl=-J(V9}EAL=AthKi)-Ep zOEQ_OcG)}dAsFV<&_7VHW?7rw^9=^A^`turcLaxiabs`O!*(6|FUy5UA#eQJm^6 zGR?cUCM{BRmDZIdd&p}+$vZYH=+mwmca((Xfs3HRXrhhtNzilL3Y*DUp~A1rkNG=M z7ru96)*?cq4icuuyzkE^O0dL`l*TABWf1KxTlkJIHy!mQU5EFP%yYc(t6T1)}2BTr@~3BP!XNr`*Ft2r|PS75RYGoL|=NNULEpaN~9;z%x$2~3%HOJA0#5o zIF+Ah(*k_A(%~3%EPGDS18n?LaKyi(Fl&_PC;jHN^w!-)V`R`)$78a?_3zjb>n*p7 zW%bFI7eD9HR#OFlibRvcteyk%fqB>+K2m@8^s-`OlU`%dqE#do-+u$Ssj?w2RvN$- z@3uWuvJ$1TWPZ}jY2oVo2Ev?j%Srb^2VJ3+mtQGq5FW%{hHme&gl<~^dA2iUjqrPa zPpV@|#nUeI>{&AbL?cSNWyi09LR5^Et1?DXPF$m4+9bd`5s~(7OL9Et5k1oxbM7db zstw{xU}LjwWo>8-sCe3V#pyk?j`2sF-* zw9|V*600iq*%2QLl{QQ!-_hi|#R6~+6xwJdZvHtNR4@#bbo7_0{6qVvvPlFp%SNvf zBTC$>iXrof!z*o1&&-&Rq}cAZvQq?R#jVxVOt4 z<)d&0zie(k(TjL-nr2=NmrD793)a@IaepYlAlz;vBP$qlopPEqj0#6nHC+E}zL_Kr4d0eL=wdJfOFqnG%7IXLk>5|j) zP8o%ccT*=~JN%U=W)z;;vjqx7>3Xh^khzLaG9qX6|Cv!>jG7mi7Dn0V3wFC3eGe%w zh|`5iE;Av68yRSsf|)smX|`t&i!hiI$ou3+6^%udGb4ITMP{=Qu-g7wxtpO?RadME?#-%3`5c?NlE#*T?)hfUTcVd{Sy&yzs4D4TaSUi;m+F{T_mC@rE52`QzQka6;IO?@mf6&b?!!jgW z_k=T2^L^e8^WWpTYLh_OGrPD;ZFtyHTVyw^86zKOJ9$%=wcQe9-TSm%7_;)SHR|)6 zf_Q%HLOpflp7{vvYEtRyz%7pYavZ{Myw?M)1M~}P0?^7QP3h#cTJHu?>LLG literal 0 HcmV?d00001 diff --git a/parley_tests/tests/draw.rs b/parley_tests/tests/draw.rs index b32e9891c..67166fc73 100644 --- a/parley_tests/tests/draw.rs +++ b/parley_tests/tests/draw.rs @@ -175,6 +175,8 @@ fn draw_colr_emoji() { /// Test COLR emoji with non printing variation selector 16 rendering across different hinting, /// per-glyph transform, and scale configurations. +/// +/// The default color emoji is different for each system, so only macOS was added for testing. #[cfg(all(target_os = "macos", feature = "system"))] #[test] fn draw_colr_emoji_with_non_printing_variation_selector_16() { @@ -189,9 +191,42 @@ fn draw_colr_emoji_with_non_printing_variation_selector_16() { test_with_configs(&mut env, |env| { let mut builder = env.ranged_builder(text); builder.push_default(StyleProperty::FontSize(24.0)); - builder.push_default(StyleProperty::FontFamily(FontFamily::named( - "Apple Color Emoji", - ))); + // Following + builder.push_default(StyleProperty::FontFamily( + parley::GenericFamily::Emoji.into(), + )); + builder.push( + StyleProperty::FontFamily(FontFamily::named("Noto Color Emoji")), + 0..9, + ); + + let mut layout = builder.build(text); + layout.break_all_lines(None); + layout.align(Alignment::Start, AlignmentOptions::default()); + layout + }); +} + +/// Test COLR emoji with non printing variation selector 16 rendering across different hinting, +/// per-glyph transform, and scale configurations. +/// +/// Should fall back to the system default color emoji. +/// +/// The default color emoji is different for each system, so only macOS was added for testing. +#[cfg(all(target_os = "macos", feature = "system"))] +#[test] +fn draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font() { + let mut env = TestEnv::new(test_name!(), None); + env.set_tolerance(5.0); + + let collection = &mut env.font_context().collection; + collection.load_system_fonts(); + + let text = "\u{270c}\u{fe0f}\u{2705}\u{270c}\u{fe0f}"; + + test_with_configs(&mut env, |env| { + let mut builder = env.ranged_builder(text); + builder.push_default(StyleProperty::FontSize(24.0)); builder.push( StyleProperty::FontFamily(FontFamily::named("Noto Color Emoji")), 0..9, From ed40e9fc60baae32234ba65533d339ba99cc825e Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Thu, 7 May 2026 07:21:37 +0800 Subject: [PATCH 02/34] rename to is_emoji_presentation --- parley/src/analysis/cluster.rs | 6 +++--- parley/src/shape/mod.rs | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index 22dccb52a..dc41f4423 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -52,8 +52,8 @@ pub(crate) struct Char { /// Indexes into the list of styles for the containing text run, to find the style applicable /// to this character. pub style_index: u16, - /// Whether the emoji with non-printing variation selector - pub is_emoji_with_non_printing_variation_selector: bool, + /// Whether the emoji presentation + pub is_emoji_presentation: bool, } pub(crate) type GlyphId = u16; @@ -354,7 +354,7 @@ impl<'a> Mapper<'a> { let mut mapped = 0; for (c, g) in self.chars.iter().zip(glyphs.iter_mut()) { // If the color emoji has a non-printing variation selector, ignore the variation selector. - if c.is_emoji_with_non_printing_variation_selector { + if c.is_emoji_presentation { break; } diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index abb6ef703..48a79b409 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -255,12 +255,11 @@ fn fill_cluster_in_place( // Its presentation depends on the platform and font. // // e.g. - // - `U+270C + U+FE0F`: `✌`, force basic presentation - // - `U+270C + U+FE0F`: `✌️`, force emoji presentation + // - VS-15: `U+270C + U+FE0F` - `✌`, render as text. + // - VS-16: `U+270C + U+FE0F` - `✌️`, render as emoji. // // - let is_emoji_with_non_printing_variation_selector = - is_emoji_or_pictograph && info.is_variation_selector(); + let is_emoji_presentation = is_emoji_or_pictograph && info.is_variation_selector(); let contributes_to_shaping = info.contributes_to_shaping(); if contributes_to_shaping { @@ -273,7 +272,7 @@ fn fill_cluster_in_place( glyph_id: 0, style_index: *style_index, is_control_character: info.is_control(), - is_emoji_with_non_printing_variation_selector, + is_emoji_presentation, }); } From 5f261167c99322c7a4b6592c4665f5796ee8f6e6 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Thu, 7 May 2026 07:38:23 +0800 Subject: [PATCH 03/34] tweak test name --- ...lr_emoji_with_presentation_style-2x_hint.png} | Bin ...oji_with_presentation_style-2x_hint_skew.png} | Bin ..._emoji_with_presentation_style-2x_nohint.png} | Bin ...i_with_presentation_style-2x_nohint_skew.png} | Bin ..._colr_emoji_with_presentation_style-hint.png} | Bin ..._emoji_with_presentation_style-hint_skew.png} | Bin ...olr_emoji_with_presentation_style-nohint.png} | Bin ...moji_with_presentation_style-nohint_skew.png} | Bin ...yle_without_setting_default_font-2x_hint.png} | Bin ...ithout_setting_default_font-2x_hint_skew.png} | Bin ...e_without_setting_default_font-2x_nohint.png} | Bin ...hout_setting_default_font-2x_nohint_skew.png} | Bin ..._style_without_setting_default_font-hint.png} | Bin ...e_without_setting_default_font-hint_skew.png} | Bin ...tyle_without_setting_default_font-nohint.png} | Bin ...without_setting_default_font-nohint_skew.png} | Bin parley_tests/tests/draw.rs | 15 ++++++++------- 17 files changed, 8 insertions(+), 7 deletions(-) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16-2x_hint.png => draw_colr_emoji_with_presentation_style-2x_hint.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16-2x_hint_skew.png => draw_colr_emoji_with_presentation_style-2x_hint_skew.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16-2x_nohint.png => draw_colr_emoji_with_presentation_style-2x_nohint.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16-2x_nohint_skew.png => draw_colr_emoji_with_presentation_style-2x_nohint_skew.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16-hint.png => draw_colr_emoji_with_presentation_style-hint.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16-hint_skew.png => draw_colr_emoji_with_presentation_style-hint_skew.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16-nohint.png => draw_colr_emoji_with_presentation_style-nohint.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16-nohint_skew.png => draw_colr_emoji_with_presentation_style-nohint_skew.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint.png => draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_hint.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint_skew.png => draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_hint_skew.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_nohint.png => draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_nohint.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_nohint_skew.png => draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_nohint_skew.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint.png => draw_colr_emoji_with_presentation_style_without_setting_default_font-hint.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint_skew.png => draw_colr_emoji_with_presentation_style_without_setting_default_font-hint_skew.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint.png => draw_colr_emoji_with_presentation_style_without_setting_default_font-nohint.png} (100%) rename parley_tests/snapshots/{draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint_skew.png => draw_colr_emoji_with_presentation_style_without_setting_default_font-nohint_skew.png} (100%) diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-2x_hint.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style-2x_hint.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-2x_hint.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style-2x_hint.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-2x_hint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style-2x_hint_skew.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-2x_hint_skew.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style-2x_hint_skew.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-2x_nohint.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style-2x_nohint.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-2x_nohint.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style-2x_nohint.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-2x_nohint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style-2x_nohint_skew.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-2x_nohint_skew.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style-2x_nohint_skew.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-hint.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style-hint.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-hint.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style-hint.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-hint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style-hint_skew.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-hint_skew.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style-hint_skew.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-nohint.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style-nohint.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-nohint.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style-nohint.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-nohint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style-nohint_skew.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16-nohint_skew.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style-nohint_skew.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_hint.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_hint.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_hint_skew.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_hint_skew.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_hint_skew.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_nohint.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_nohint.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_nohint.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_nohint.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_nohint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_nohint_skew.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-2x_nohint_skew.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-2x_nohint_skew.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-hint.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-hint.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-hint_skew.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-hint_skew.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-hint_skew.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-nohint.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-nohint.png diff --git a/parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint_skew.png b/parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-nohint_skew.png similarity index 100% rename from parley_tests/snapshots/draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font-nohint_skew.png rename to parley_tests/snapshots/draw_colr_emoji_with_presentation_style_without_setting_default_font-nohint_skew.png diff --git a/parley_tests/tests/draw.rs b/parley_tests/tests/draw.rs index 67166fc73..7117607ab 100644 --- a/parley_tests/tests/draw.rs +++ b/parley_tests/tests/draw.rs @@ -173,13 +173,14 @@ fn draw_colr_emoji() { }); } -/// Test COLR emoji with non printing variation selector 16 rendering across different hinting, -/// per-glyph transform, and scale configurations. +/// Test COLR emoji rendering across different hinting, per-glyph transform, and scale configurations. +/// +/// The COLR emoji with presentation style(VS-16). /// /// The default color emoji is different for each system, so only macOS was added for testing. #[cfg(all(target_os = "macos", feature = "system"))] #[test] -fn draw_colr_emoji_with_non_printing_variation_selector_16() { +fn draw_colr_emoji_with_presentation_style() { let mut env = TestEnv::new(test_name!(), None); env.set_tolerance(5.0); @@ -207,15 +208,15 @@ fn draw_colr_emoji_with_non_printing_variation_selector_16() { }); } -/// Test COLR emoji with non printing variation selector 16 rendering across different hinting, -/// per-glyph transform, and scale configurations. +/// Test COLR emoji rendering across different hinting, per-glyph transform, and scale configurations. /// -/// Should fall back to the system default color emoji. +/// The COLR emoji with presentation style(VS-16) without setting the default font, +/// and should fallback to the system default color emoji. /// /// The default color emoji is different for each system, so only macOS was added for testing. #[cfg(all(target_os = "macos", feature = "system"))] #[test] -fn draw_colr_emoji_with_non_printing_variation_selector_16_without_default_font() { +fn draw_colr_emoji_with_presentation_style_without_setting_default_font() { let mut env = TestEnv::new(test_name!(), None); env.set_tolerance(5.0); From 749c56e964fe7433ccbc3a6842f504d7dc38643a Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Thu, 7 May 2026 07:47:24 +0800 Subject: [PATCH 04/34] update comment --- parley/src/analysis/cluster.rs | 2 +- parley/src/shape/mod.rs | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index dc41f4423..954cc156c 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -353,7 +353,7 @@ impl<'a> Mapper<'a> { } let mut mapped = 0; for (c, g) in self.chars.iter().zip(glyphs.iter_mut()) { - // If the color emoji has a non-printing variation selector, ignore the variation selector. + // If the color emoji has a presentation style, ignore the variation selector. if c.is_emoji_presentation { break; } diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index 48a79b409..ae0e39a38 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -251,8 +251,7 @@ fn fill_cluster_in_place( // regional indicators, subdivision flag tag sequences, skin tone modifiers // See also: https://github.com/google/emoji-segmenter - // If the color emoji has a non-printing variation selector, ignore the variation selector. - // Its presentation depends on the platform and font. + // If the color emoji has a presentation style, ignore the variation selector. // // e.g. // - VS-15: `U+270C + U+FE0F` - `✌`, render as text. From 2ccc1c3ab81d006c036150bd236cb79f5e6985a4 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Thu, 7 May 2026 12:06:20 +0800 Subject: [PATCH 05/34] fix text presentation unicode --- parley/src/shape/mod.rs | 8 ++++---- parley_tests/tests/draw.rs | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index ae0e39a38..b916c1de6 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -251,12 +251,12 @@ fn fill_cluster_in_place( // regional indicators, subdivision flag tag sequences, skin tone modifiers // See also: https://github.com/google/emoji-segmenter - // If the color emoji has a presentation style, ignore the variation selector. + // Unicode color emoji has two variants: // - // e.g. - // - VS-15: `U+270C + U+FE0F` - `✌`, render as text. - // - VS-16: `U+270C + U+FE0F` - `✌️`, render as emoji. + // * Emoji presentation(VS16): `U+270C + U+FE0F` - `✌️` + // * Text presentation(VS15): `U+270C + U+FE0E` - `✌` // + // // let is_emoji_presentation = is_emoji_or_pictograph && info.is_variation_selector(); diff --git a/parley_tests/tests/draw.rs b/parley_tests/tests/draw.rs index 7117607ab..38a7caa07 100644 --- a/parley_tests/tests/draw.rs +++ b/parley_tests/tests/draw.rs @@ -175,7 +175,7 @@ fn draw_colr_emoji() { /// Test COLR emoji rendering across different hinting, per-glyph transform, and scale configurations. /// -/// The COLR emoji with presentation style(VS-16). +/// The COLR emoji with presentation style(VS16). /// /// The default color emoji is different for each system, so only macOS was added for testing. #[cfg(all(target_os = "macos", feature = "system"))] @@ -210,8 +210,8 @@ fn draw_colr_emoji_with_presentation_style() { /// Test COLR emoji rendering across different hinting, per-glyph transform, and scale configurations. /// -/// The COLR emoji with presentation style(VS-16) without setting the default font, -/// and should fallback to the system default color emoji. +/// The COLR emoji with presentation style(VS16) without setting the default font, +/// and should fallback to the system default color emoji font. /// /// The default color emoji is different for each system, so only macOS was added for testing. #[cfg(all(target_os = "macos", feature = "system"))] From 30251cb9f23c76b400d3399529e572b8a18dde8e Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Thu, 7 May 2026 12:08:54 +0800 Subject: [PATCH 06/34] adjust name --- parley/src/analysis/cluster.rs | 6 +++--- parley/src/shape/mod.rs | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index 954cc156c..520bca174 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -52,8 +52,8 @@ pub(crate) struct Char { /// Indexes into the list of styles for the containing text run, to find the style applicable /// to this character. pub style_index: u16, - /// Whether the emoji presentation - pub is_emoji_presentation: bool, + /// Whether the emoji presentation style + pub is_emoji_presentation_style: bool, } pub(crate) type GlyphId = u16; @@ -354,7 +354,7 @@ impl<'a> Mapper<'a> { let mut mapped = 0; for (c, g) in self.chars.iter().zip(glyphs.iter_mut()) { // If the color emoji has a presentation style, ignore the variation selector. - if c.is_emoji_presentation { + if c.is_emoji_presentation_style { break; } diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index b916c1de6..a60b2c777 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -258,7 +258,7 @@ fn fill_cluster_in_place( // // // - let is_emoji_presentation = is_emoji_or_pictograph && info.is_variation_selector(); + let is_emoji_presentation_style = is_emoji_or_pictograph && info.is_variation_selector(); let contributes_to_shaping = info.contributes_to_shaping(); if contributes_to_shaping { @@ -271,7 +271,7 @@ fn fill_cluster_in_place( glyph_id: 0, style_index: *style_index, is_control_character: info.is_control(), - is_emoji_presentation, + is_emoji_presentation_style, }); } From ca7b822f065336a2c24f7f7a40137d4870623597 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Thu, 7 May 2026 12:59:40 +0800 Subject: [PATCH 07/34] update test --- parley/src/analysis/cluster.rs | 6 +++--- parley/src/shape/mod.rs | 6 +++--- parley_tests/tests/draw.rs | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index 520bca174..0e2bcc6ef 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -52,8 +52,8 @@ pub(crate) struct Char { /// Indexes into the list of styles for the containing text run, to find the style applicable /// to this character. pub style_index: u16, - /// Whether the emoji presentation style - pub is_emoji_presentation_style: bool, + /// Whether the emoji presentation selector + pub is_emoji_presentation_selector: bool, } pub(crate) type GlyphId = u16; @@ -354,7 +354,7 @@ impl<'a> Mapper<'a> { let mut mapped = 0; for (c, g) in self.chars.iter().zip(glyphs.iter_mut()) { // If the color emoji has a presentation style, ignore the variation selector. - if c.is_emoji_presentation_style { + if c.is_emoji_presentation_selector { break; } diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index a60b2c777..d634c6f87 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -251,14 +251,14 @@ fn fill_cluster_in_place( // regional indicators, subdivision flag tag sequences, skin tone modifiers // See also: https://github.com/google/emoji-segmenter - // Unicode color emoji has two variants: + // Unicode color emoji has two presentation styles: // // * Emoji presentation(VS16): `U+270C + U+FE0F` - `✌️` // * Text presentation(VS15): `U+270C + U+FE0E` - `✌` // // // - let is_emoji_presentation_style = is_emoji_or_pictograph && info.is_variation_selector(); + let is_emoji_presentation_selector = is_emoji_or_pictograph && info.is_variation_selector(); let contributes_to_shaping = info.contributes_to_shaping(); if contributes_to_shaping { @@ -271,7 +271,7 @@ fn fill_cluster_in_place( glyph_id: 0, style_index: *style_index, is_control_character: info.is_control(), - is_emoji_presentation_style, + is_emoji_presentation_selector, }); } diff --git a/parley_tests/tests/draw.rs b/parley_tests/tests/draw.rs index 38a7caa07..83df0894b 100644 --- a/parley_tests/tests/draw.rs +++ b/parley_tests/tests/draw.rs @@ -210,7 +210,7 @@ fn draw_colr_emoji_with_presentation_style() { /// Test COLR emoji rendering across different hinting, per-glyph transform, and scale configurations. /// -/// The COLR emoji with presentation style(VS16) without setting the default font, +/// The COLR emoji with presentation style without setting the default font, /// and should fallback to the system default color emoji font. /// /// The default color emoji is different for each system, so only macOS was added for testing. @@ -223,7 +223,7 @@ fn draw_colr_emoji_with_presentation_style_without_setting_default_font() { let collection = &mut env.font_context().collection; collection.load_system_fonts(); - let text = "\u{270c}\u{fe0f}\u{2705}\u{270c}\u{fe0f}"; + let text = "\u{270c}\u{fe0f}\u{2705}\u{270c}\u{fe0e}"; test_with_configs(&mut env, |env| { let mut builder = env.ranged_builder(text); From 48d58d601ba14327d7d45dedfaf7648f7a70bd3d Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Thu, 7 May 2026 17:14:24 +0800 Subject: [PATCH 08/34] should update mapped --- parley/src/analysis/cluster.rs | 21 +++++++++++---------- parley_tests/tests/draw.rs | 2 +- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index 0e2bcc6ef..50cc585e1 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -353,22 +353,23 @@ impl<'a> Mapper<'a> { } let mut mapped = 0; for (c, g) in self.chars.iter().zip(glyphs.iter_mut()) { + *g = f(c.ch); + // If the color emoji has a presentation style, ignore the variation selector. if c.is_emoji_presentation_selector { - break; + mapped += 1; + continue; } - if !c.contributes_to_shaping { - *g = f(c.ch); - if self.map_len == 1 { - mapped += 1; - } - } else { - let gid = f(c.ch); - *g = gid; - if gid != 0 { + if c.contributes_to_shaping { + if *g != 0 { mapped += 1; } + continue; + } + + if self.map_len == 1 { + mapped += 1; } } let ratio = mapped as f32 / self.map_len as f32; diff --git a/parley_tests/tests/draw.rs b/parley_tests/tests/draw.rs index 83df0894b..afbf9496a 100644 --- a/parley_tests/tests/draw.rs +++ b/parley_tests/tests/draw.rs @@ -223,7 +223,7 @@ fn draw_colr_emoji_with_presentation_style_without_setting_default_font() { let collection = &mut env.font_context().collection; collection.load_system_fonts(); - let text = "\u{270c}\u{fe0f}\u{2705}\u{270c}\u{fe0e}"; + let text = "\u{270c}\u{fe0f}\u{2705}\u{270c}\u{fe0e}\u{fe0e}\u{fe0f}"; test_with_configs(&mut env, |env| { let mut builder = env.ranged_builder(text); From 7357be49b811451f8ab1da0d8e61c5880947fa60 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:08:28 +0800 Subject: [PATCH 09/34] impl emoji segmenter --- parley/src/analysis/cluster.rs | 6 +- parley/src/analysis/mod.rs | 28 +- parley/src/emoji/mod.rs | 394 ++++++++++++ parley/src/lib.rs | 1 + parley/src/shape/mod.rs | 54 +- parley/src/tests/mod.rs | 1 + parley/src/tests/test_analysis.rs | 2 +- parley/src/tests/test_emoji_segmenters.rs | 725 ++++++++++++++++++++++ parley_tests/tests/draw.rs | 2 +- 9 files changed, 1193 insertions(+), 20 deletions(-) create mode 100644 parley/src/emoji/mod.rs create mode 100644 parley/src/tests/test_emoji_segmenters.rs diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index 50cc585e1..4b4ae1fec 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -4,7 +4,7 @@ use alloc::vec::Vec; use icu_normalizer::properties::Decomposed; -use crate::analysis::AnalysisDataSources; +use crate::{analysis::AnalysisDataSources, emoji::ScannedEmojiPresetation}; /// The maximum number of characters in a single cluster. const MAX_CLUSTER_SIZE: usize = 32; @@ -12,11 +12,11 @@ const MAX_CLUSTER_SIZE: usize = 32; #[derive(Debug, Default)] pub(crate) struct CharCluster { pub chars: Vec, - pub is_emoji: bool, pub map_len: u8, pub start: u32, pub end: u32, pub force_normalize: bool, + pub scanned_emoji_presetation: ScannedEmojiPresetation, comp: Form, decomp: Form, form: FormKind, @@ -95,7 +95,6 @@ pub(crate) enum Status { impl CharCluster { pub(crate) fn clear(&mut self) { self.chars.clear(); - self.is_emoji = false; self.map_len = 0; self.start = 0; self.end = 0; @@ -104,6 +103,7 @@ impl CharCluster { self.decomp.clear(); self.form = FormKind::Original; self.best_ratio = 0.; + self.scanned_emoji_presetation.clear(); } #[inline(always)] diff --git a/parley/src/analysis/mod.rs b/parley/src/analysis/mod.rs index 460e78d96..f165fac1c 100644 --- a/parley/src/analysis/mod.rs +++ b/parley/src/analysis/mod.rs @@ -13,9 +13,13 @@ use icu_normalizer::properties::{ CanonicalComposition, CanonicalCompositionBorrowed, CanonicalDecomposition, CanonicalDecompositionBorrowed, }; -use icu_properties::props::{BidiMirroringGlyph, GeneralCategory, GraphemeClusterBreak, Script}; +use icu_properties::props::{ + BidiMirroringGlyph, EmojiComponent, EmojiModifier, EmojiModifierBase, EmojiPresentation, + GeneralCategory, GraphemeClusterBreak, Script, +}; use icu_properties::{ - CodePointMapData, CodePointMapDataBorrowed, PropertyNamesShort, PropertyNamesShortBorrowed, + CodePointMapData, CodePointMapDataBorrowed, CodePointSetData, CodePointSetDataBorrowed, + PropertyNamesShort, PropertyNamesShortBorrowed, }; use icu_segmenter::options::{LineBreakOptions, LineBreakWordOption, WordBreakInvariantOptions}; use icu_segmenter::{ @@ -92,6 +96,26 @@ impl AnalysisDataSources { fn brackets(&self) -> CodePointMapDataBorrowed<'_, BidiMirroringGlyph> { const { CodePointMapData::new() } } + + #[inline(always)] + pub(crate) fn emoji_modifier(&self) -> CodePointSetDataBorrowed<'_> { + const { CodePointSetData::new::() } + } + + #[inline(always)] + pub(crate) fn emoji_modifier_base(&self) -> CodePointSetDataBorrowed<'_> { + const { CodePointSetData::new::() } + } + + #[inline(always)] + pub(crate) fn emoji_component(&self) -> CodePointSetDataBorrowed<'_> { + const { CodePointSetData::new::() } + } + + #[inline(always)] + pub(crate) fn emoji_presentation(&self) -> CodePointSetDataBorrowed<'_> { + const { CodePointSetData::new::() } + } } #[derive(Copy, Clone, Debug, PartialEq, Eq)] diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs new file mode 100644 index 000000000..2e0b094bb --- /dev/null +++ b/parley/src/emoji/mod.rs @@ -0,0 +1,394 @@ +//! The Core algorithm is based on [Emoji Segmenter]'s Ragel grammar. +//! +//! Follow the [UTS51](Unicode Technical Standard #51). +//! +//! [Emoji Segmenter]: +//! [TR51]: + +#[derive(Clone, Copy, Default, Debug)] +pub(crate) struct EmojiFlags(u32); + +impl EmojiFlags { + const EMOJI_SHIFT: u32 = 0; + const EMOJI_MODIFIER_SHIFT: u32 = 1; + const EMOJI_MODIFIER_BASE_SHIFT: u32 = 2; + const EMOJI_PRESENTATION_SHIFT: u32 = 3; + const EMOJI_COMPONENT_SHIFT: u32 = 4; + const REGIONAL_INDICATOR_SHIFT: u32 = 5; + + const EMOJI_MASK: u32 = 1 << Self::EMOJI_SHIFT; + const EMOJI_MODIFIER_MASK: u32 = 1 << Self::EMOJI_MODIFIER_SHIFT; + const EMOJI_MODIFIER_BASE_MASK: u32 = 1 << Self::EMOJI_MODIFIER_BASE_SHIFT; + const EMOJI_PRESENTATION_MASK: u32 = 1 << Self::EMOJI_PRESENTATION_SHIFT; + #[allow(unused)] + const EMOJI_COMPONENT_MASK: u32 = 1 << Self::EMOJI_COMPONENT_SHIFT; + const REGIONAL_INDICATOR_MASK: u32 = 1 << Self::REGIONAL_INDICATOR_SHIFT; + + #[inline(always)] + pub(crate) const fn new() -> Self { + Self(0) + } + + #[inline(always)] + pub(crate) const fn with_emoji(mut self, is_emoji: bool) -> Self { + self.0 |= (is_emoji as u32) << Self::EMOJI_SHIFT; + self + } + + #[inline(always)] + pub(crate) const fn with_extra( + mut self, + is_emoji_modifier: bool, + is_emoji_modifier_base: bool, + is_emoji_presentation: bool, + is_emoji_component: bool, + is_regional_indicator: bool, + ) -> Self { + self.0 |= (is_emoji_modifier as u32) << Self::EMOJI_MODIFIER_SHIFT; + self.0 |= (is_emoji_modifier_base as u32) << Self::EMOJI_MODIFIER_BASE_SHIFT; + self.0 |= (is_emoji_presentation as u32) << Self::EMOJI_PRESENTATION_SHIFT; + self.0 |= (is_emoji_component as u32) << Self::EMOJI_COMPONENT_SHIFT; + self.0 |= (is_regional_indicator as u32) << Self::REGIONAL_INDICATOR_SHIFT; + self + } + + #[inline(always)] + pub(crate) const fn is_emoji(&self) -> bool { + self.0 & Self::EMOJI_MASK != 0 + } + + #[inline(always)] + pub(crate) const fn is_emoji_modifier(&self) -> bool { + self.0 & Self::EMOJI_MODIFIER_MASK != 0 + } + + #[inline(always)] + pub(crate) const fn is_emoji_modifier_base(&self) -> bool { + self.0 & Self::EMOJI_MODIFIER_BASE_MASK != 0 + } + + #[inline(always)] + pub(crate) const fn is_emoji_presentation(&self) -> bool { + self.0 & Self::EMOJI_PRESENTATION_MASK != 0 + } + + #[allow(unused)] + #[inline(always)] + pub(crate) const fn is_emoji_component(&self) -> bool { + self.0 & Self::EMOJI_COMPONENT_MASK != 0 + } + + #[inline(always)] + pub(crate) const fn is_regional_indicator(&self) -> bool { + self.0 & Self::REGIONAL_INDICATOR_MASK != 0 + } +} + +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub(crate) enum EmojiSegmentationCategory { + Emoji = 0, + EmojiTextPresentation, + EmojiEmojiPresentation, + EmojiModifierBaseText, + EmojiModifierBaseEmoji, + EmojiModifier, + RegionalIndicator, + KeycapBase, + CombiningEnclosingKeycap, + CombiningEnclosingCircleBackslash, + Zwj, + Vs15, + Vs16, + TagBase, + TagSequence, + TagTerm, + None, +} + +impl EmojiSegmentationCategory { + #[inline(always)] + pub(crate) const fn from_codepoint(cp: u32, flags: EmojiFlags) -> Self { + match cp { + // '0'..'9', '#', '*' + 0x30..=0x39 | 0x23 | 0x2a => Self::KeycapBase, + 0x200D => Self::Zwj, + 0x20E0 => Self::CombiningEnclosingCircleBackslash, + 0x20E3 => Self::CombiningEnclosingKeycap, + 0xFE0E => Self::Vs15, + 0xFE0F => Self::Vs16, + 0x1F3F4 => Self::TagBase, + 0xE0030..=0xE0039 | 0xE0061..0xE007A => Self::TagSequence, + 0xE007F => Self::TagTerm, + _ => { + if flags.is_emoji_modifier_base() { + if flags.is_emoji_presentation() { + return Self::EmojiModifierBaseEmoji; + } + return Self::EmojiModifierBaseText; + } + + if flags.is_emoji_modifier() { + return Self::EmojiModifier; + } + + if flags.is_regional_indicator() { + return Self::RegionalIndicator; + } + + if flags.is_emoji_presentation() { + return Self::EmojiEmojiPresentation; + } + + if flags.is_emoji() { + if !flags.is_emoji_presentation() { + return Self::EmojiTextPresentation; + } + return Self::Emoji; + } + + Self::None + } + } + } + + const fn eq(self, other: Self) -> bool { + self as u8 == other as u8 + } +} + +#[derive(Clone, Copy, Default, PartialEq, Eq, Debug)] +pub(crate) struct ScannedEmojiPresetation { + pub is_emoji: bool, + pub has_vs: bool, +} + +impl ScannedEmojiPresetation { + pub(crate) fn is_emoji(&self) -> bool { + self.is_emoji + } + + pub(crate) fn clear(&mut self) { + self.is_emoji = false; + self.has_vs = false; + } +} + +pub(crate) const fn scan_emoji_presetation( + categories: &[EmojiSegmentationCategory], +) -> ScannedEmojiPresetation { + let len = categories.len(); + + if len == 0 { + return ScannedEmojiPresetation { + is_emoji: false, + has_vs: false, + }; + } + + let (is_any_emoji, is_emoji_modifier_base, is_emoji_presentation) = + emoji_matches(categories[0]); + + // In order to give the the VS15 sequences higher priority than detecting + // + // text_emoji_run_with_vs + let is_text_emoji_presentation_sequence = + is_any_emoji && len >= 2 && EmojiSegmentationCategory::Vs15.eq(categories[1]); + if is_text_emoji_presentation_sequence && len == 2 || is_text_emoji_keycap_sequence(categories) + { + return ScannedEmojiPresetation { + is_emoji: false, + has_vs: true, + }; + } + + // emoji_run + if is_emoji_presentation && len == 1 { + return ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }; + } + + if is_unqualified_keycap_sequence(categories) { + return ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }; + } + + let is_emoji_combining_enclosing_circle_backslash_sequence = is_any_emoji + && len == 2 + && EmojiSegmentationCategory::CombiningEnclosingCircleBackslash.eq(categories[1]); + if is_emoji_combining_enclosing_circle_backslash_sequence { + return ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }; + } + + if is_emoji_flag_sequence(categories) { + return ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }; + } + + // TAG_BASE TAG_SEQUENCE+ TAG_TERM; + if is_emoji_tag_sequence(categories) { + return ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }; + } + + // emoji_run_with_vs + let is_emoji_presentation_sequence = + is_any_emoji && len >= 2 && EmojiSegmentationCategory::Vs16.eq(categories[1]); + if (is_emoji_presentation_sequence && len == 2) || is_emoji_keycap_sequence(categories) { + return ScannedEmojiPresetation { + is_emoji: true, + has_vs: true, + }; + } + + let is_emoji_modifier_sequence = is_emoji_modifier_base + && len >= 2 + && EmojiSegmentationCategory::EmojiModifier.eq(categories[1]); + if is_emoji_modifier_sequence && len == 2 { + return ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }; + } + + let mut cursor = if is_emoji_presentation_sequence || is_emoji_modifier_sequence { + 2 + } else if is_any_emoji { + 1 + } else { + len + }; + + // fast path + if cursor == len { + return ScannedEmojiPresetation { + is_emoji: false, + has_vs: is_text_emoji_presentation_sequence, + }; + } + + // zwj sequences + // + // emoji_zwj_element = emoji_presentation_sequence | emoji_modifier_sequence | any_emoji + // emoji_zwj_element (zwj emoji_zwj_element)+ + while cursor < len && EmojiSegmentationCategory::Zwj.eq(categories[cursor]) { + cursor += 1; + + let (is_any_emoji, is_emoji_modifier_base, _) = emoji_matches(categories[cursor]); + + if cursor + 1 < len { + let is_emoji_presentation_sequence = + is_any_emoji && EmojiSegmentationCategory::Vs16.eq(categories[cursor + 1]); + if is_emoji_presentation_sequence { + cursor += 2; + continue; + } + + let is_emoji_modifier_sequence = is_emoji_modifier_base + && EmojiSegmentationCategory::EmojiModifier.eq(categories[cursor + 1]); + if is_emoji_modifier_sequence { + cursor += 2; + continue; + } + } + + if is_any_emoji { + cursor += 1; + continue; + } + } + + ScannedEmojiPresetation { + is_emoji: cursor == len || is_emoji_presentation_sequence || is_emoji_modifier_sequence, + has_vs: false, + } +} + +/// Extracts the emoji category flags from the given category. +/// +/// `is_any_emoji`: +/// EmojiTextPresentation | EmojiEmojiPresentation | KeycapBase | +/// EmojiModifierBaseText | EmojiModifierBaseEmoji | TagBase | Emoji +/// +/// `is_emoji_modifier_base`: EmojiModifierBaseText | EmojiModifierBaseEmoji +/// +/// `is_emoji_presentation`: +/// EmojiEmojiPresentation | TagBase | EmojiModifierBaseEmoji | +/// EmojiModifier | RegionalIndicator +/// +/// Returns `(is_any_emoji, is_emoji_modifier_base, is_emoji_presentation)` +#[inline(always)] +const fn emoji_matches(category: EmojiSegmentationCategory) -> (bool, bool, bool) { + use EmojiSegmentationCategory::*; + + match category { + EmojiTextPresentation | KeycapBase | Emoji => (true, false, false), + + EmojiEmojiPresentation | TagBase => (true, false, true), + + EmojiModifierBaseText => (true, true, false), + EmojiModifierBaseEmoji => (true, true, true), + + EmojiModifier | RegionalIndicator => (false, false, true), + + _ => (false, false, false), + } +} + +#[inline(always)] +const fn is_text_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { + categories.len() == 3 + && EmojiSegmentationCategory::KeycapBase.eq(categories[0]) + && EmojiSegmentationCategory::Vs15.eq(categories[1]) + && EmojiSegmentationCategory::CombiningEnclosingKeycap.eq(categories[2]) +} + +#[inline(always)] +const fn is_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { + categories.len() == 3 + && EmojiSegmentationCategory::KeycapBase.eq(categories[0]) + && EmojiSegmentationCategory::Vs16.eq(categories[1]) + && EmojiSegmentationCategory::CombiningEnclosingKeycap.eq(categories[2]) +} + +#[inline(always)] +const fn is_emoji_flag_sequence(categories: &[EmojiSegmentationCategory]) -> bool { + categories.len() == 2 + && EmojiSegmentationCategory::RegionalIndicator.eq(categories[0]) + && EmojiSegmentationCategory::RegionalIndicator.eq(categories[1]) +} + +#[inline(always)] +const fn is_emoji_tag_sequence(categories: &[EmojiSegmentationCategory]) -> bool { + let is_tag_sequence = categories.len() >= 2 + && EmojiSegmentationCategory::TagBase.eq(categories[0]) + && EmojiSegmentationCategory::TagTerm.eq(categories[categories.len() - 1]); + + let mut i = 1; + while i < categories.len() - 1 { + if !EmojiSegmentationCategory::TagSequence.eq(categories[i]) { + return false; + } + i += 1; + } + + is_tag_sequence +} + +#[inline(always)] +const fn is_unqualified_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { + categories.len() == 2 + && EmojiSegmentationCategory::KeycapBase.eq(categories[0]) + && EmojiSegmentationCategory::CombiningEnclosingKeycap.eq(categories[1]) +} diff --git a/parley/src/lib.rs b/parley/src/lib.rs index ac8269ed1..70226720e 100644 --- a/parley/src/lib.rs +++ b/parley/src/lib.rs @@ -113,6 +113,7 @@ mod bidi; mod builder; mod context; mod convert; +mod emoji; mod font; mod inline_box; mod lru_cache; diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index d634c6f87..1b41f9a47 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -14,6 +14,7 @@ use super::style::{Brush, FontFeature, FontVariation}; use crate::analysis::cluster::{Char, CharCluster, Status}; use crate::analysis::{AnalysisDataSources, CharInfo}; use crate::convert::script_to_harfrust; +use crate::emoji::{EmojiFlags, EmojiSegmentationCategory, scan_emoji_presetation}; use crate::inline_box::InlineBox; use crate::lru_cache::LruCache; use crate::util::nearly_eq; @@ -229,36 +230,57 @@ fn fill_cluster_in_place( item_infos_iter: &mut core::slice::Iter<'_, (CharInfo, u16)>, code_unit_offset_in_string: &mut usize, char_cluster: &mut CharCluster, + analysis_data_sources: &AnalysisDataSources, ) { // Reset cluster but keep allocation char_cluster.clear(); let mut force_normalize = false; - let mut is_emoji_or_pictograph = false; let mut map_len: u8 = 0; let start = *code_unit_offset_in_string as u32; + let mut is_emoji = false; + let mut emoji_segmentations = Vec::with_capacity(segment_text.char_indices().count()); + for ((_, ch), (info, style_index)) in segment_text.char_indices().zip(item_infos_iter.by_ref()) { + *code_unit_offset_in_string += ch.len_utf8(); force_normalize |= info.force_normalize(); + // TODO - make emoji detection more complete, as per (except using composite Trie tables as // much as possible: // https://github.com/conor-93/parley/blob/4637d826732a1a82bbb3c904c7f47a16a21cceec/parley/src/shape/mod.rs#L221-L269 - is_emoji_or_pictograph |= info.is_emoji_or_pictograph(); - *code_unit_offset_in_string += ch.len_utf8(); // TODO: Explore ignoring other modifiers in determining `contributes_to_shaping`: // regional indicators, subdivision flag tag sequences, skin tone modifiers // See also: https://github.com/google/emoji-segmenter - // Unicode color emoji has two presentation styles: - // - // * Emoji presentation(VS16): `U+270C + U+FE0F` - `✌️` - // * Text presentation(VS15): `U+270C + U+FE0E` - `✌` - // - // - // - let is_emoji_presentation_selector = is_emoji_or_pictograph && info.is_variation_selector(); + is_emoji |= info.is_emoji_or_pictograph(); + + let mut is_emoji_presentation_selector = false; + + if is_emoji { + let emoji_modifier = analysis_data_sources.emoji_modifier(); + let emoji_modifier_base = analysis_data_sources.emoji_modifier_base(); + let emoji_component = analysis_data_sources.emoji_component(); + let emoji_presentation = analysis_data_sources.emoji_presentation(); + + let category = EmojiSegmentationCategory::from_codepoint( + ch as u32, + EmojiFlags::new().with_emoji(is_emoji).with_extra( + emoji_modifier.contains(ch), + emoji_modifier_base.contains(ch), + emoji_presentation.contains(ch), + emoji_component.contains(ch), + info.is_region_indicator(), + ), + ); + + is_emoji_presentation_selector = EmojiSegmentationCategory::Vs16.eq(&category) + || EmojiSegmentationCategory::Vs15.eq(&category); + + emoji_segmentations.push(category); + } let contributes_to_shaping = info.contributes_to_shaping(); if contributes_to_shaping { @@ -277,11 +299,14 @@ fn fill_cluster_in_place( // Finalize cluster metadata let end = *code_unit_offset_in_string as u32; - char_cluster.is_emoji = is_emoji_or_pictograph; char_cluster.map_len = map_len; char_cluster.start = start; char_cluster.end = end; char_cluster.force_normalize = force_normalize; + + if is_emoji { + char_cluster.scanned_emoji_presetation = scan_emoji_presetation(&emoji_segmentations); + } } fn shape_item<'a, B: Brush>( @@ -323,6 +348,7 @@ fn shape_item<'a, B: Brush>( &mut item_infos_iter, &mut code_unit_offset_in_string, char_cluster, + analysis_data_sources, ); let mut current_font = font_selector.select_font(char_cluster, analysis_data_sources); @@ -343,6 +369,7 @@ fn shape_item<'a, B: Brush>( &mut item_infos_iter, &mut code_unit_offset_in_string, char_cluster, + analysis_data_sources, ); if let Some(next_font) = font_selector.select_font(char_cluster, analysis_data_sources) @@ -568,7 +595,7 @@ impl<'a, 'b, B: Brush> FontSelector<'a, 'b, B> { analysis_data_sources: &AnalysisDataSources, ) -> Option { let style_index = cluster.style_index(); - let is_emoji = cluster.is_emoji; + let is_emoji = cluster.scanned_emoji_presetation.is_emoji(); if style_index != self.style_index || is_emoji || self.fonts_id.is_none() { self.style_index = style_index; let style = &self.styles[style_index as usize]; @@ -577,6 +604,7 @@ impl<'a, 'b, B: Brush> FontSelector<'a, 'b, B> { let fonts = self.rcx.stack(style.font_family).unwrap_or(&[]); let fonts = fonts.iter().copied().map(QueryFamily::Id); if is_emoji { + std::dbg!(is_emoji); use core::iter::once; let emoji_family = QueryFamily::Generic(fontique::GenericFamily::Emoji); self.query.set_families(fonts.chain(once(emoji_family))); diff --git a/parley/src/tests/mod.rs b/parley/src/tests/mod.rs index 52b77f872..f6f46b89d 100644 --- a/parley/src/tests/mod.rs +++ b/parley/src/tests/mod.rs @@ -3,4 +3,5 @@ mod test_analysis; mod test_builders; +mod test_emoji_segmenters; mod utils; diff --git a/parley/src/tests/test_analysis.rs b/parley/src/tests/test_analysis.rs index 202e4f11a..1e80d97d4 100644 --- a/parley/src/tests/test_analysis.rs +++ b/parley/src/tests/test_analysis.rs @@ -1180,7 +1180,7 @@ fn test_whitespace_contiguous_interspersed_in_latin_mixed() { } #[test] -fn test_color_emoji_with_non_printing_variation_selector() { +fn test_color_emoji_with_presentation() { verify_analysis("\u{270c}\u{fe0f}", |_| {}) .expect_is_emoji_or_pictograph_list(vec![true, false]) .expect_is_variation_selector_list(vec![false, true]); diff --git a/parley/src/tests/test_emoji_segmenters.rs b/parley/src/tests/test_emoji_segmenters.rs new file mode 100644 index 000000000..1bbe595cb --- /dev/null +++ b/parley/src/tests/test_emoji_segmenters.rs @@ -0,0 +1,725 @@ +use core::char; +use std::vec::Vec; + +use crate::{ + analysis::AnalysisDataSources, + emoji::{ + EmojiFlags, EmojiSegmentationCategory, ScannedEmojiPresetation, scan_emoji_presetation, + }, +}; + +struct TestEntity<'a> { + sequence: &'a [u32], + categories: &'a [EmojiSegmentationCategory], + scanned: ScannedEmojiPresetation, +} + +fn assert_emoji_segmenters_produce_same_result(entity: TestEntity<'_>) { + let analysis = AnalysisDataSources::new(); + let emoji_modifier = analysis.emoji_modifier(); + let emoji_modifier_base = analysis.emoji_modifier_base(); + let emoji_component = analysis.emoji_component(); + let emoji_presentation = analysis.emoji_presentation(); + + let result = entity + .sequence + .iter() + .copied() + .map(|cp| { + let props = analysis.properties(char::from_u32(cp).unwrap()); + + let is_emoji = props.is_emoji_or_pictograph(); + let is_emoji_modifier = emoji_modifier.contains32(cp); + let is_emoji_modifier_base = emoji_modifier_base.contains32(cp); + let is_emoji_presentation = emoji_presentation.contains32(cp); + let is_emoji_component = emoji_component.contains32(cp); + let is_regional_indicator = props.is_region_indicator(); + + let emoji_flags = EmojiFlags::new().with_emoji(is_emoji).with_extra( + is_emoji_modifier, + is_emoji_modifier_base, + is_emoji_presentation, + is_emoji_component, + is_regional_indicator, + ); + + EmojiSegmentationCategory::from_codepoint(cp, emoji_flags) + }) + .collect::>(); + + assert_eq!(result, entity.categories); + + assert_eq!(scan_emoji_presetation(&result), entity.scanned); +} + +// Emoji presentation default; Encoded: 😀 +#[test] +fn emoji_presentation_default() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F600, // GRINNING FACE + ], + categories: &[EmojiSegmentationCategory::EmojiEmojiPresentation], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Text presentation default (copyright); Encoded: © +#[test] +fn text_presentation_default() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x00A9, // COPYRIGHT SIGN + ], + categories: &[EmojiSegmentationCategory::EmojiTextPresentation], + scanned: ScannedEmojiPresetation { + is_emoji: false, + has_vs: false, + }, + }); +} + +// Lone keycap base; Encoded: 1 +#[test] +fn long_keycap_base() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[0x0031], // DIGIT ONE + categories: &[EmojiSegmentationCategory::KeycapBase], + scanned: ScannedEmojiPresetation { + is_emoji: false, + has_vs: false, + }, + }); +} + +// Keycap base + VS-15 (no term); Encoded: 1︎ +#[test] +fn keycap_base_vs15() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x0031, // DIGIT ONE + 0xFE0E, // VARIATION SELECTOR-15 + ], + categories: &[ + EmojiSegmentationCategory::KeycapBase, + EmojiSegmentationCategory::Vs15, + ], + scanned: ScannedEmojiPresetation { + is_emoji: false, + has_vs: true, + }, + }); +} + +// Keycap base + VS-16 (no term); Encoded: 1️ +#[test] +fn keycap_base_vs16() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x0031, // DIGIT ONE + 0xFE0F, // VARIATION SELECTOR-16 + ], + categories: &[ + EmojiSegmentationCategory::KeycapBase, + EmojiSegmentationCategory::Vs16, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: true, + }, + }); +} + +// Unqualified keycap; Encoded: #⃣ +#[test] +fn unqualified_keycap() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x0023, // NUMBER SIGN + 0x20E3, // COMBINING ENCLOSING KEYCAP + ], + categories: &[ + EmojiSegmentationCategory::KeycapBase, + EmojiSegmentationCategory::CombiningEnclosingKeycap, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Keycap + VS-15 + term; Encoded: 1︎⃣ +#[test] +fn keycap_vs15_term() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x0031, // DIGIT ONE + 0xFE0E, // VARIATION SELECTOR-15 + 0x20E3, // COMBINING ENCLOSING KEYCAP + ], + categories: &[ + EmojiSegmentationCategory::KeycapBase, + EmojiSegmentationCategory::Vs15, + EmojiSegmentationCategory::CombiningEnclosingKeycap, + ], + scanned: ScannedEmojiPresetation { + is_emoji: false, + has_vs: true, + }, + }); +} + +// Qualified keycap; Encoded: *️⃣ +#[test] +fn qualified_keycap() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x002A, // ASTERISK + 0xFE0F, // VARIATION SELECTOR-16 + 0x20E3, // COMBINING ENCLOSING KEYCAP + ], + categories: &[ + EmojiSegmentationCategory::KeycapBase, + EmojiSegmentationCategory::Vs16, + EmojiSegmentationCategory::CombiningEnclosingKeycap, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: true, + }, + }); +} + +// Lone emoji modifier (Fitzpatrick); Encoded: 🏻 +#[test] +fn lone_emoji_modifier() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 + ], + categories: &[EmojiSegmentationCategory::EmojiModifier], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Bare modifier base, text default; Encoded: ☝ +#[test] +fn bare_modifier_base_text_default() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x261D, // WHITE UP POINTING INDEX + ], + categories: &[EmojiSegmentationCategory::EmojiModifierBaseText], + scanned: ScannedEmojiPresetation { + is_emoji: false, + has_vs: false, + }, + }); +} + +// Modifier base (text default) + VS-16; Encoded: ☝️ +#[test] +fn modifier_base_text_default_vs16() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x261D, // WHITE UP POINTING INDEX + 0xFE0F, // VARIATION SELECTOR-16 + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseText, + EmojiSegmentationCategory::Vs16, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: true, + }, + }); +} + +// Modifier base (text default) + skin tone; Encoded: ☝🏻 +#[test] +fn modifier_base_text_default_skin_tone() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x261D, // WHITE UP POINTING INDEX + 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseText, + EmojiSegmentationCategory::EmojiModifier, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Modifier base (emoji default) + skin tone; Encoded: 👦🏻 +#[test] +fn modifier_base_emoji_default_skin_tone() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F466, // BOY + 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifier, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Lone regional indicator; Encoded: 🇺 +#[test] +fn lone_regional_indicator() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F1FA, // REGIONAL INDICATOR SYMBOL LETTER U + ], + categories: &[EmojiSegmentationCategory::RegionalIndicator], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Flag sequence (US); Encoded: 🇺🇸 +#[test] +fn flag_sequence_us() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F1FA, // REGIONAL INDICATOR SYMBOL LETTER U + 0x1F1F8, // REGIONAL INDICATOR SYMBOL LETTER S + ], + categories: &[ + EmojiSegmentationCategory::RegionalIndicator, + EmojiSegmentationCategory::RegionalIndicator, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Double lone regional indicator + Flag sequence (US); Encoded: 🇺🇺🇸 +// +// FIXME: segmented clusters are incorrect +// ✖️, [[0x1F1FA, 0x1F1FA], [0x1F1F8]] +// ✔️, [[0x1F1FA], [0x1F1FA, 0x1F1F8]] +#[test] +#[ignore] +fn double_lone_regional_indicator_flag_sequence_us() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F1FA, // REGIONAL INDICATOR SYMBOL LETTER U + 0x1F1FA, // REGIONAL INDICATOR SYMBOL LETTER U + 0x1F1F8, // REGIONAL INDICATOR SYMBOL LETTER S + ], + categories: &[ + EmojiSegmentationCategory::RegionalIndicator, + EmojiSegmentationCategory::RegionalIndicator, + EmojiSegmentationCategory::RegionalIndicator, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Text-default emoji + VS-15; Encoded: ☺︎ +#[test] +fn text_default_emoji_vs15() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x263A, // WHITE SMILING FACE + 0xFE0E, // VARIATION SELECTOR-15 + ], + categories: &[ + EmojiSegmentationCategory::EmojiTextPresentation, + EmojiSegmentationCategory::Vs15, + ], + scanned: ScannedEmojiPresetation { + is_emoji: false, + has_vs: true, + }, + }); +} + +// Text-default emoji + VS-16; Encoded: ☺️ +#[test] +fn text_default_emoji_vs16() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x263A, // WHITE SMILING FACE + 0xFE0F, // VARIATION SELECTOR-16 + ], + categories: &[ + EmojiSegmentationCategory::EmojiTextPresentation, + EmojiSegmentationCategory::Vs16, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: true, + }, + }); +} + +// Emoji-default emoji + VS-15; Encoded: 😀︎ +#[test] +fn emoji_default_emoji_vs15() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F600, // GRINNING FACE + 0xFE0E, // VARIATION SELECTOR-15 + ], + categories: &[ + EmojiSegmentationCategory::EmojiEmojiPresentation, + EmojiSegmentationCategory::Vs15, + ], + scanned: ScannedEmojiPresetation { + is_emoji: false, + has_vs: true, + }, + }); +} + +// Emoji-default emoji + VS-16; Encoded: 😀️ +#[test] +fn emoji_default_emoji_vs16() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F600, // GRINNING FACE + 0xFE0F, // VARIATION SELECTOR-16 + ], + categories: &[ + EmojiSegmentationCategory::EmojiEmojiPresentation, + EmojiSegmentationCategory::Vs16, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: true, + }, + }); +} + +// ZWJ family; Encoded: 👨‍👩‍👧 +#[test] +fn zwj_family() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F468, // MAN + 0x200D, // ZERO WIDTH JOINER + 0x1F469, // WOMAN + 0x200D, // ZERO WIDTH JOINER + 0x1F467, // GIRL + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Long ZWJ family (4 members); Encoded: 👨‍👩‍👧‍👦 +#[test] +fn long_zwj_family() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F468, // MAN + 0x200D, // ZERO WIDTH JOINER + 0x1F469, // WOMAN + 0x200D, // ZERO WIDTH JOINER + 0x1F467, // GIRL + 0x200D, // ZERO WIDTH JOINER + 0x1F466, // BOY + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// ZWJ couple; Encoded: 👨‍❤‍👨 +#[test] +fn zwj_couple() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F468, // MAN + 0x200D, // ZERO WIDTH JOINER + 0x2764, // HEAVY BLACK HEART + 0x200D, // ZERO WIDTH JOINER + 0x1F468, // MAN + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiTextPresentation, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// ZWJ with VS-16 element; Encoded: 👨️‍👩 +#[test] +fn zwj_with_vs16_element() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F468, // MAN + 0xFE0F, // VARIATION SELECTOR-16 + 0x200D, // ZERO WIDTH JOINER + 0x1F469, // WOMAN + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Vs16, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// ZWJ with VS-16 on both elements; Encoded: 👨️‍👩️ +#[test] +fn zwj_with_vs16_on_both_elements() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F468, // MAN + 0xFE0F, // VARIATION SELECTOR-16 + 0x200D, // ZERO WIDTH JOINER + 0x1F469, // WOMAN + 0xFE0F, // VARIATION SELECTOR-16 + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Vs16, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::Vs16, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// ZWJ after modifier sequence; Encoded: 👦🏻‍💻 +#[test] +fn zwj_after_modifier_sequence() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F466, // BOY + 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 + 0x200D, // ZERO WIDTH JOINER + 0x1F4BB, // PERSONAL COMPUTER + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifier, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiEmojiPresentation, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// ZWJ technologist with skin tone; Encoded: 👨🏻‍💻 +#[test] +fn zwj_technologist_with_skin_tone() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F468, // MAN + 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 + 0x200D, // ZERO WIDTH JOINER + 0x1F4BB, // PERSONAL COMPUTER + ], + categories: &[ + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifier, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiEmojiPresentation, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// VS-16 enables ZWJ continuation; Encoded: ☺️‍👩 +#[test] +fn vs16_enables_zwj_continuation() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x263A, // WHITE SMILING FACE + 0xFE0F, // VARIATION SELECTOR-16 + 0x200D, // ZERO WIDTH JOINER + 0x1F469, // WOMAN + ], + categories: &[ + EmojiSegmentationCategory::EmojiTextPresentation, + EmojiSegmentationCategory::Vs16, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiModifierBaseEmoji, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// Tag sequence (England); Encoded: 🏴󠁧󠁢󠁥󠁮󠁧󠁿 +#[test] +fn tag_sequence_england() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F3F4, // WAVING BLACK FLAG + 0xE0067, // TAG LATIN SMALL LETTER G + 0xE0062, // TAG LATIN SMALL LETTER B + 0xE0065, // TAG LATIN SMALL LETTER E + 0xE006E, // TAG LATIN SMALL LETTER N + 0xE0067, // TAG LATIN SMALL LETTER G + 0xE007F, // CANCEL TAG + ], + categories: &[ + EmojiSegmentationCategory::TagBase, + EmojiSegmentationCategory::TagSequence, + EmojiSegmentationCategory::TagSequence, + EmojiSegmentationCategory::TagSequence, + EmojiSegmentationCategory::TagSequence, + EmojiSegmentationCategory::TagSequence, + EmojiSegmentationCategory::TagTerm, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// TAG_BASE as ZWJ element; Encoded: 🏴‍😀" +#[test] +fn tag_base_as_zwj_element() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F3F4, // WAVING BLACK FLAG + 0x200D, // ZERO WIDTH JOINER + 0x1F600, // GRINNING FACE + ], + categories: &[ + EmojiSegmentationCategory::TagBase, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiEmojiPresentation, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// TAG_BASE + VS-16 + ZWJ; Encoded: 🏴️‍😀", +#[test] +fn tag_base_vs16_as_zwj() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F3F4, // WAVING BLACK FLAG + 0xFE0F, // VARIATION SELECTOR-16 + 0x200D, // ZERO WIDTH JOINER + 0x1F600, // GRINNING FACE + ], + categories: &[ + EmojiSegmentationCategory::TagBase, + EmojiSegmentationCategory::Vs16, + EmojiSegmentationCategory::Zwj, + EmojiSegmentationCategory::EmojiEmojiPresentation, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: false, + }, + }); +} + +// TAG_BASE + VS-15; Encoded: 🏴︎ +#[test] +fn tag_base_vs15() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F3F4, // WAVING BLACK FLAG + 0xFE0E, // VARIATION SELECTOR-15 + ], + categories: &[ + EmojiSegmentationCategory::TagBase, + EmojiSegmentationCategory::Vs15, + ], + scanned: ScannedEmojiPresetation { + is_emoji: false, + has_vs: true, + }, + }); +} + +// TAG_BASE + VS-16; Encoded: 🏴️ +#[test] +fn tag_base_vs16() { + assert_emoji_segmenters_produce_same_result(TestEntity { + sequence: &[ + 0x1F3F4, // WAVING BLACK FLAG + 0xFE0F, // VARIATION SELECTOR-16 + ], + categories: &[ + EmojiSegmentationCategory::TagBase, + EmojiSegmentationCategory::Vs16, + ], + scanned: ScannedEmojiPresetation { + is_emoji: true, + has_vs: true, + }, + }); +} diff --git a/parley_tests/tests/draw.rs b/parley_tests/tests/draw.rs index afbf9496a..da49dc45c 100644 --- a/parley_tests/tests/draw.rs +++ b/parley_tests/tests/draw.rs @@ -223,7 +223,7 @@ fn draw_colr_emoji_with_presentation_style_without_setting_default_font() { let collection = &mut env.font_context().collection; collection.load_system_fonts(); - let text = "\u{270c}\u{fe0f}\u{2705}\u{270c}\u{fe0e}\u{fe0e}\u{fe0f}"; + let text = "\u{270c}\u{fe0f}\u{2705}\u{270c}\u{fe0f}"; test_with_configs(&mut env, |env| { let mut builder = env.ranged_builder(text); From 059c707238d98ce0be8f2a1358e7ff73ccdfa762 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:10:55 +0800 Subject: [PATCH 10/34] add copyright header --- parley/src/emoji/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 2e0b094bb..3bf366ae0 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -1,3 +1,6 @@ +// Copyright 2026 the Parley Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + //! The Core algorithm is based on [Emoji Segmenter]'s Ragel grammar. //! //! Follow the [UTS51](Unicode Technical Standard #51). From e6a80867feed084446efcbbae238285934d0e78f Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:13:37 +0800 Subject: [PATCH 11/34] add copyright header --- parley/src/tests/test_emoji_segmenters.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/parley/src/tests/test_emoji_segmenters.rs b/parley/src/tests/test_emoji_segmenters.rs index 1bbe595cb..4d4d06bcc 100644 --- a/parley/src/tests/test_emoji_segmenters.rs +++ b/parley/src/tests/test_emoji_segmenters.rs @@ -1,3 +1,6 @@ +// Copyright 2025 the Parley Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + use core::char; use std::vec::Vec; From 98a672d6bd3bd57874d5bcc764f8b14655128630 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:18:45 +0800 Subject: [PATCH 12/34] fix typo --- parley/src/analysis/cluster.rs | 6 +- parley/src/emoji/mod.rs | 30 +++++----- parley/src/shape/mod.rs | 6 +- parley/src/tests/test_emoji_segmenters.rs | 72 +++++++++++------------ 4 files changed, 57 insertions(+), 57 deletions(-) diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index 4b4ae1fec..a8a9d4686 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -4,7 +4,7 @@ use alloc::vec::Vec; use icu_normalizer::properties::Decomposed; -use crate::{analysis::AnalysisDataSources, emoji::ScannedEmojiPresetation}; +use crate::{analysis::AnalysisDataSources, emoji::ScannedEmojiPresentation}; /// The maximum number of characters in a single cluster. const MAX_CLUSTER_SIZE: usize = 32; @@ -16,7 +16,7 @@ pub(crate) struct CharCluster { pub start: u32, pub end: u32, pub force_normalize: bool, - pub scanned_emoji_presetation: ScannedEmojiPresetation, + pub scanned_emoji_presentation: ScannedEmojiPresentation, comp: Form, decomp: Form, form: FormKind, @@ -103,7 +103,7 @@ impl CharCluster { self.decomp.clear(); self.form = FormKind::Original; self.best_ratio = 0.; - self.scanned_emoji_presetation.clear(); + self.scanned_emoji_presentation.clear(); } #[inline(always)] diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 3bf366ae0..e901f90bd 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -161,12 +161,12 @@ impl EmojiSegmentationCategory { } #[derive(Clone, Copy, Default, PartialEq, Eq, Debug)] -pub(crate) struct ScannedEmojiPresetation { +pub(crate) struct ScannedEmojiPresentation { pub is_emoji: bool, pub has_vs: bool, } -impl ScannedEmojiPresetation { +impl ScannedEmojiPresentation { pub(crate) fn is_emoji(&self) -> bool { self.is_emoji } @@ -177,13 +177,13 @@ impl ScannedEmojiPresetation { } } -pub(crate) const fn scan_emoji_presetation( +pub(crate) const fn scan_emoji_presentation( categories: &[EmojiSegmentationCategory], -) -> ScannedEmojiPresetation { +) -> ScannedEmojiPresentation { let len = categories.len(); if len == 0 { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: false, has_vs: false, }; @@ -199,7 +199,7 @@ pub(crate) const fn scan_emoji_presetation( is_any_emoji && len >= 2 && EmojiSegmentationCategory::Vs15.eq(categories[1]); if is_text_emoji_presentation_sequence && len == 2 || is_text_emoji_keycap_sequence(categories) { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: false, has_vs: true, }; @@ -207,14 +207,14 @@ pub(crate) const fn scan_emoji_presetation( // emoji_run if is_emoji_presentation && len == 1 { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: true, has_vs: false, }; } if is_unqualified_keycap_sequence(categories) { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: true, has_vs: false, }; @@ -224,14 +224,14 @@ pub(crate) const fn scan_emoji_presetation( && len == 2 && EmojiSegmentationCategory::CombiningEnclosingCircleBackslash.eq(categories[1]); if is_emoji_combining_enclosing_circle_backslash_sequence { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: true, has_vs: false, }; } if is_emoji_flag_sequence(categories) { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: true, has_vs: false, }; @@ -239,7 +239,7 @@ pub(crate) const fn scan_emoji_presetation( // TAG_BASE TAG_SEQUENCE+ TAG_TERM; if is_emoji_tag_sequence(categories) { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: true, has_vs: false, }; @@ -249,7 +249,7 @@ pub(crate) const fn scan_emoji_presetation( let is_emoji_presentation_sequence = is_any_emoji && len >= 2 && EmojiSegmentationCategory::Vs16.eq(categories[1]); if (is_emoji_presentation_sequence && len == 2) || is_emoji_keycap_sequence(categories) { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: true, has_vs: true, }; @@ -259,7 +259,7 @@ pub(crate) const fn scan_emoji_presetation( && len >= 2 && EmojiSegmentationCategory::EmojiModifier.eq(categories[1]); if is_emoji_modifier_sequence && len == 2 { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: true, has_vs: false, }; @@ -275,7 +275,7 @@ pub(crate) const fn scan_emoji_presetation( // fast path if cursor == len { - return ScannedEmojiPresetation { + return ScannedEmojiPresentation { is_emoji: false, has_vs: is_text_emoji_presentation_sequence, }; @@ -312,7 +312,7 @@ pub(crate) const fn scan_emoji_presetation( } } - ScannedEmojiPresetation { + ScannedEmojiPresentation { is_emoji: cursor == len || is_emoji_presentation_sequence || is_emoji_modifier_sequence, has_vs: false, } diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index 1b41f9a47..565686210 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -14,7 +14,7 @@ use super::style::{Brush, FontFeature, FontVariation}; use crate::analysis::cluster::{Char, CharCluster, Status}; use crate::analysis::{AnalysisDataSources, CharInfo}; use crate::convert::script_to_harfrust; -use crate::emoji::{EmojiFlags, EmojiSegmentationCategory, scan_emoji_presetation}; +use crate::emoji::{EmojiFlags, EmojiSegmentationCategory, scan_emoji_presentation}; use crate::inline_box::InlineBox; use crate::lru_cache::LruCache; use crate::util::nearly_eq; @@ -305,7 +305,7 @@ fn fill_cluster_in_place( char_cluster.force_normalize = force_normalize; if is_emoji { - char_cluster.scanned_emoji_presetation = scan_emoji_presetation(&emoji_segmentations); + char_cluster.scanned_emoji_presentation = scan_emoji_presentation(&emoji_segmentations); } } @@ -595,7 +595,7 @@ impl<'a, 'b, B: Brush> FontSelector<'a, 'b, B> { analysis_data_sources: &AnalysisDataSources, ) -> Option { let style_index = cluster.style_index(); - let is_emoji = cluster.scanned_emoji_presetation.is_emoji(); + let is_emoji = cluster.scanned_emoji_presentation.is_emoji(); if style_index != self.style_index || is_emoji || self.fonts_id.is_none() { self.style_index = style_index; let style = &self.styles[style_index as usize]; diff --git a/parley/src/tests/test_emoji_segmenters.rs b/parley/src/tests/test_emoji_segmenters.rs index 4d4d06bcc..4f971d0cd 100644 --- a/parley/src/tests/test_emoji_segmenters.rs +++ b/parley/src/tests/test_emoji_segmenters.rs @@ -7,14 +7,14 @@ use std::vec::Vec; use crate::{ analysis::AnalysisDataSources, emoji::{ - EmojiFlags, EmojiSegmentationCategory, ScannedEmojiPresetation, scan_emoji_presetation, + EmojiFlags, EmojiSegmentationCategory, ScannedEmojiPresentation, scan_emoji_presentation, }, }; struct TestEntity<'a> { sequence: &'a [u32], categories: &'a [EmojiSegmentationCategory], - scanned: ScannedEmojiPresetation, + scanned: ScannedEmojiPresentation, } fn assert_emoji_segmenters_produce_same_result(entity: TestEntity<'_>) { @@ -52,7 +52,7 @@ fn assert_emoji_segmenters_produce_same_result(entity: TestEntity<'_>) { assert_eq!(result, entity.categories); - assert_eq!(scan_emoji_presetation(&result), entity.scanned); + assert_eq!(scan_emoji_presentation(&result), entity.scanned); } // Emoji presentation default; Encoded: 😀 @@ -63,7 +63,7 @@ fn emoji_presentation_default() { 0x1F600, // GRINNING FACE ], categories: &[EmojiSegmentationCategory::EmojiEmojiPresentation], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -78,7 +78,7 @@ fn text_presentation_default() { 0x00A9, // COPYRIGHT SIGN ], categories: &[EmojiSegmentationCategory::EmojiTextPresentation], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: false, has_vs: false, }, @@ -91,7 +91,7 @@ fn long_keycap_base() { assert_emoji_segmenters_produce_same_result(TestEntity { sequence: &[0x0031], // DIGIT ONE categories: &[EmojiSegmentationCategory::KeycapBase], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: false, has_vs: false, }, @@ -110,7 +110,7 @@ fn keycap_base_vs15() { EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::Vs15, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: false, has_vs: true, }, @@ -129,7 +129,7 @@ fn keycap_base_vs16() { EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: true, }, @@ -148,7 +148,7 @@ fn unqualified_keycap() { EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::CombiningEnclosingKeycap, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -169,7 +169,7 @@ fn keycap_vs15_term() { EmojiSegmentationCategory::Vs15, EmojiSegmentationCategory::CombiningEnclosingKeycap, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: false, has_vs: true, }, @@ -190,7 +190,7 @@ fn qualified_keycap() { EmojiSegmentationCategory::Vs16, EmojiSegmentationCategory::CombiningEnclosingKeycap, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: true, }, @@ -205,7 +205,7 @@ fn lone_emoji_modifier() { 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 ], categories: &[EmojiSegmentationCategory::EmojiModifier], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -220,7 +220,7 @@ fn bare_modifier_base_text_default() { 0x261D, // WHITE UP POINTING INDEX ], categories: &[EmojiSegmentationCategory::EmojiModifierBaseText], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: false, has_vs: false, }, @@ -239,7 +239,7 @@ fn modifier_base_text_default_vs16() { EmojiSegmentationCategory::EmojiModifierBaseText, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: true, }, @@ -258,7 +258,7 @@ fn modifier_base_text_default_skin_tone() { EmojiSegmentationCategory::EmojiModifierBaseText, EmojiSegmentationCategory::EmojiModifier, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -277,7 +277,7 @@ fn modifier_base_emoji_default_skin_tone() { EmojiSegmentationCategory::EmojiModifierBaseEmoji, EmojiSegmentationCategory::EmojiModifier, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -292,7 +292,7 @@ fn lone_regional_indicator() { 0x1F1FA, // REGIONAL INDICATOR SYMBOL LETTER U ], categories: &[EmojiSegmentationCategory::RegionalIndicator], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -311,7 +311,7 @@ fn flag_sequence_us() { EmojiSegmentationCategory::RegionalIndicator, EmojiSegmentationCategory::RegionalIndicator, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -337,7 +337,7 @@ fn double_lone_regional_indicator_flag_sequence_us() { EmojiSegmentationCategory::RegionalIndicator, EmojiSegmentationCategory::RegionalIndicator, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -356,7 +356,7 @@ fn text_default_emoji_vs15() { EmojiSegmentationCategory::EmojiTextPresentation, EmojiSegmentationCategory::Vs15, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: false, has_vs: true, }, @@ -375,7 +375,7 @@ fn text_default_emoji_vs16() { EmojiSegmentationCategory::EmojiTextPresentation, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: true, }, @@ -394,7 +394,7 @@ fn emoji_default_emoji_vs15() { EmojiSegmentationCategory::EmojiEmojiPresentation, EmojiSegmentationCategory::Vs15, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: false, has_vs: true, }, @@ -413,7 +413,7 @@ fn emoji_default_emoji_vs16() { EmojiSegmentationCategory::EmojiEmojiPresentation, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: true, }, @@ -438,7 +438,7 @@ fn zwj_family() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -467,7 +467,7 @@ fn long_zwj_family() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -492,7 +492,7 @@ fn zwj_couple() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -515,7 +515,7 @@ fn zwj_with_vs16_element() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -540,7 +540,7 @@ fn zwj_with_vs16_on_both_elements() { EmojiSegmentationCategory::EmojiModifierBaseEmoji, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -563,7 +563,7 @@ fn zwj_after_modifier_sequence() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiEmojiPresentation, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -586,7 +586,7 @@ fn zwj_technologist_with_skin_tone() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiEmojiPresentation, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -609,7 +609,7 @@ fn vs16_enables_zwj_continuation() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -638,7 +638,7 @@ fn tag_sequence_england() { EmojiSegmentationCategory::TagSequence, EmojiSegmentationCategory::TagTerm, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -659,7 +659,7 @@ fn tag_base_as_zwj_element() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiEmojiPresentation, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -682,7 +682,7 @@ fn tag_base_vs16_as_zwj() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiEmojiPresentation, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: false, }, @@ -701,7 +701,7 @@ fn tag_base_vs15() { EmojiSegmentationCategory::TagBase, EmojiSegmentationCategory::Vs15, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: false, has_vs: true, }, @@ -720,7 +720,7 @@ fn tag_base_vs16() { EmojiSegmentationCategory::TagBase, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresetation { + scanned: ScannedEmojiPresentation { is_emoji: true, has_vs: true, }, From ffe321eb7741f9858dc473b750aa8190e2d5581c Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:23:05 +0800 Subject: [PATCH 13/34] fix doc link --- parley/src/emoji/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index e901f90bd..b2c3e4602 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -6,7 +6,7 @@ //! Follow the [UTS51](Unicode Technical Standard #51). //! //! [Emoji Segmenter]: -//! [TR51]: +//! [UTS51]: #[derive(Clone, Copy, Default, Debug)] pub(crate) struct EmojiFlags(u32); From 2760eba367e47bb3a946b087cdb3035809fdf712 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:27:56 +0800 Subject: [PATCH 14/34] remove debug --- parley/src/shape/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index 565686210..c19d21912 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -604,7 +604,6 @@ impl<'a, 'b, B: Brush> FontSelector<'a, 'b, B> { let fonts = self.rcx.stack(style.font_family).unwrap_or(&[]); let fonts = fonts.iter().copied().map(QueryFamily::Id); if is_emoji { - std::dbg!(is_emoji); use core::iter::once; let emoji_family = QueryFamily::Generic(fontique::GenericFamily::Emoji); self.query.set_families(fonts.chain(once(emoji_family))); From b5749184c5cedd2b48cc68df4fc0aca37758a41f Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:33:20 +0800 Subject: [PATCH 15/34] fix doc --- parley/src/emoji/mod.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index b2c3e4602..0d7e922bb 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -321,14 +321,14 @@ pub(crate) const fn scan_emoji_presentation( /// Extracts the emoji category flags from the given category. /// /// `is_any_emoji`: -/// EmojiTextPresentation | EmojiEmojiPresentation | KeycapBase | -/// EmojiModifierBaseText | EmojiModifierBaseEmoji | TagBase | Emoji +/// `EmojiTextPresentation` | `EmojiEmojiPresentation` | `KeycapBase` | +/// `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` | `TagBase` | `Emoji` /// -/// `is_emoji_modifier_base`: EmojiModifierBaseText | EmojiModifierBaseEmoji +/// `is_emoji_modifier_base`: `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` /// /// `is_emoji_presentation`: -/// EmojiEmojiPresentation | TagBase | EmojiModifierBaseEmoji | -/// EmojiModifier | RegionalIndicator +/// `EmojiEmojiPresentation` | `TagBase` | `EmojiModifierBaseEmoji` | +/// `EmojiModifier` | `RegionalIndicator` /// /// Returns `(is_any_emoji, is_emoji_modifier_base, is_emoji_presentation)` #[inline(always)] From f9cd2d914d46c9b46159708f212ed89eec1d370d Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:35:48 +0800 Subject: [PATCH 16/34] fix doc --- parley/src/emoji/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 0d7e922bb..fd724e83b 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -1,7 +1,7 @@ // Copyright 2026 the Parley Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -//! The Core algorithm is based on [Emoji Segmenter]'s Ragel grammar. +//! The implementation is based on [Emoji Segmenter]'s Ragel grammar. //! //! Follow the [UTS51](Unicode Technical Standard #51). //! From f651df4031fcf3e97aca59c9e850bb85e00b4194 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:39:49 +0800 Subject: [PATCH 17/34] fix clippy --- parley/src/emoji/mod.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index fd724e83b..7dd2425cb 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -56,33 +56,33 @@ impl EmojiFlags { } #[inline(always)] - pub(crate) const fn is_emoji(&self) -> bool { + pub(crate) const fn is_emoji(self) -> bool { self.0 & Self::EMOJI_MASK != 0 } #[inline(always)] - pub(crate) const fn is_emoji_modifier(&self) -> bool { + pub(crate) const fn is_emoji_modifier(self) -> bool { self.0 & Self::EMOJI_MODIFIER_MASK != 0 } #[inline(always)] - pub(crate) const fn is_emoji_modifier_base(&self) -> bool { + pub(crate) const fn is_emoji_modifier_base(self) -> bool { self.0 & Self::EMOJI_MODIFIER_BASE_MASK != 0 } #[inline(always)] - pub(crate) const fn is_emoji_presentation(&self) -> bool { + pub(crate) const fn is_emoji_presentation(self) -> bool { self.0 & Self::EMOJI_PRESENTATION_MASK != 0 } #[allow(unused)] #[inline(always)] - pub(crate) const fn is_emoji_component(&self) -> bool { + pub(crate) const fn is_emoji_component(self) -> bool { self.0 & Self::EMOJI_COMPONENT_MASK != 0 } #[inline(always)] - pub(crate) const fn is_regional_indicator(&self) -> bool { + pub(crate) const fn is_regional_indicator(self) -> bool { self.0 & Self::REGIONAL_INDICATOR_MASK != 0 } } @@ -167,7 +167,7 @@ pub(crate) struct ScannedEmojiPresentation { } impl ScannedEmojiPresentation { - pub(crate) fn is_emoji(&self) -> bool { + pub(crate) fn is_emoji(self) -> bool { self.is_emoji } From 6f57530d5165ede9251f2acda216c3d66f8f0a84 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:47:45 +0800 Subject: [PATCH 18/34] adjust eq order --- parley/src/emoji/mod.rs | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 7dd2425cb..38050e517 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -196,7 +196,7 @@ pub(crate) const fn scan_emoji_presentation( // // text_emoji_run_with_vs let is_text_emoji_presentation_sequence = - is_any_emoji && len >= 2 && EmojiSegmentationCategory::Vs15.eq(categories[1]); + is_any_emoji && len >= 2 && categories[1].eq(EmojiSegmentationCategory::Vs15); if is_text_emoji_presentation_sequence && len == 2 || is_text_emoji_keycap_sequence(categories) { return ScannedEmojiPresentation { @@ -222,7 +222,7 @@ pub(crate) const fn scan_emoji_presentation( let is_emoji_combining_enclosing_circle_backslash_sequence = is_any_emoji && len == 2 - && EmojiSegmentationCategory::CombiningEnclosingCircleBackslash.eq(categories[1]); + && categories[1].eq(EmojiSegmentationCategory::CombiningEnclosingCircleBackslash); if is_emoji_combining_enclosing_circle_backslash_sequence { return ScannedEmojiPresentation { is_emoji: true, @@ -247,7 +247,7 @@ pub(crate) const fn scan_emoji_presentation( // emoji_run_with_vs let is_emoji_presentation_sequence = - is_any_emoji && len >= 2 && EmojiSegmentationCategory::Vs16.eq(categories[1]); + is_any_emoji && len >= 2 && categories[1].eq(EmojiSegmentationCategory::Vs16); if (is_emoji_presentation_sequence && len == 2) || is_emoji_keycap_sequence(categories) { return ScannedEmojiPresentation { is_emoji: true, @@ -257,7 +257,7 @@ pub(crate) const fn scan_emoji_presentation( let is_emoji_modifier_sequence = is_emoji_modifier_base && len >= 2 - && EmojiSegmentationCategory::EmojiModifier.eq(categories[1]); + && categories[1].eq(EmojiSegmentationCategory::EmojiModifier); if is_emoji_modifier_sequence && len == 2 { return ScannedEmojiPresentation { is_emoji: true, @@ -292,14 +292,14 @@ pub(crate) const fn scan_emoji_presentation( if cursor + 1 < len { let is_emoji_presentation_sequence = - is_any_emoji && EmojiSegmentationCategory::Vs16.eq(categories[cursor + 1]); + is_any_emoji && categories[cursor + 1].eq(EmojiSegmentationCategory::Vs16); if is_emoji_presentation_sequence { cursor += 2; continue; } let is_emoji_modifier_sequence = is_emoji_modifier_base - && EmojiSegmentationCategory::EmojiModifier.eq(categories[cursor + 1]); + && categories[cursor + 1].eq(EmojiSegmentationCategory::EmojiModifier); if is_emoji_modifier_sequence { cursor += 2; continue; @@ -352,35 +352,35 @@ const fn emoji_matches(category: EmojiSegmentationCategory) -> (bool, bool, bool #[inline(always)] const fn is_text_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { categories.len() == 3 - && EmojiSegmentationCategory::KeycapBase.eq(categories[0]) - && EmojiSegmentationCategory::Vs15.eq(categories[1]) - && EmojiSegmentationCategory::CombiningEnclosingKeycap.eq(categories[2]) + && categories[0].eq(EmojiSegmentationCategory::KeycapBase) + && categories[1].eq(EmojiSegmentationCategory::Vs15) + && categories[2].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) } #[inline(always)] const fn is_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { categories.len() == 3 - && EmojiSegmentationCategory::KeycapBase.eq(categories[0]) - && EmojiSegmentationCategory::Vs16.eq(categories[1]) - && EmojiSegmentationCategory::CombiningEnclosingKeycap.eq(categories[2]) + && categories[0].eq(EmojiSegmentationCategory::KeycapBase) + && categories[1].eq(EmojiSegmentationCategory::Vs16) + && categories[2].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) } #[inline(always)] const fn is_emoji_flag_sequence(categories: &[EmojiSegmentationCategory]) -> bool { categories.len() == 2 - && EmojiSegmentationCategory::RegionalIndicator.eq(categories[0]) - && EmojiSegmentationCategory::RegionalIndicator.eq(categories[1]) + && categories[0].eq(EmojiSegmentationCategory::RegionalIndicator) + && categories[1].eq(EmojiSegmentationCategory::RegionalIndicator) } #[inline(always)] const fn is_emoji_tag_sequence(categories: &[EmojiSegmentationCategory]) -> bool { let is_tag_sequence = categories.len() >= 2 - && EmojiSegmentationCategory::TagBase.eq(categories[0]) - && EmojiSegmentationCategory::TagTerm.eq(categories[categories.len() - 1]); + && categories[0].eq(EmojiSegmentationCategory::TagBase) + && categories[categories.len() - 1].eq(EmojiSegmentationCategory::TagTerm); let mut i = 1; while i < categories.len() - 1 { - if !EmojiSegmentationCategory::TagSequence.eq(categories[i]) { + if !categories[i].eq(EmojiSegmentationCategory::TagSequence) { return false; } i += 1; @@ -392,6 +392,6 @@ const fn is_emoji_tag_sequence(categories: &[EmojiSegmentationCategory]) -> bool #[inline(always)] const fn is_unqualified_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { categories.len() == 2 - && EmojiSegmentationCategory::KeycapBase.eq(categories[0]) - && EmojiSegmentationCategory::CombiningEnclosingKeycap.eq(categories[1]) + && categories[0].eq(EmojiSegmentationCategory::KeycapBase) + && categories[1].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) } From 80f8e773abd31ea584599df78c2806e1f0e056fb Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 00:56:12 +0800 Subject: [PATCH 19/34] adjust eq order --- parley/src/emoji/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 38050e517..ea283ac51 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -285,7 +285,7 @@ pub(crate) const fn scan_emoji_presentation( // // emoji_zwj_element = emoji_presentation_sequence | emoji_modifier_sequence | any_emoji // emoji_zwj_element (zwj emoji_zwj_element)+ - while cursor < len && EmojiSegmentationCategory::Zwj.eq(categories[cursor]) { + while cursor < len && categories[cursor].eq(EmojiSegmentationCategory::Zwj) { cursor += 1; let (is_any_emoji, is_emoji_modifier_base, _) = emoji_matches(categories[cursor]); From 9bc070146e8cf5448ff3823040f79d412555dc68 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 01:36:44 +0800 Subject: [PATCH 20/34] fix copyright year --- parley/src/tests/test_emoji_segmenters.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parley/src/tests/test_emoji_segmenters.rs b/parley/src/tests/test_emoji_segmenters.rs index 4f971d0cd..bb8acafd1 100644 --- a/parley/src/tests/test_emoji_segmenters.rs +++ b/parley/src/tests/test_emoji_segmenters.rs @@ -1,4 +1,4 @@ -// Copyright 2025 the Parley Authors +// Copyright 2026 the Parley Authors // SPDX-License-Identifier: Apache-2.0 OR MIT use core::char; From b1f20eb28ca29b9775bea4a8f8fc21f084a7f0a9 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 10:42:22 +0800 Subject: [PATCH 21/34] add comments --- parley/src/emoji/mod.rs | 183 +++++++++++++++++++++++----------------- parley/src/shape/mod.rs | 4 +- 2 files changed, 108 insertions(+), 79 deletions(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index ea283ac51..acc6505b0 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -1,14 +1,15 @@ // Copyright 2026 the Parley Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -//! The implementation is based on [Emoji Segmenter]'s Ragel grammar. +//! This implementation is based on [Emoji Segmenter]'s Ragel grammar (Apache-2.0). //! -//! Follow the [UTS51](Unicode Technical Standard #51). +//! And follow the [UTS51](Unicode Technical Standard #51). //! //! [Emoji Segmenter]: //! [UTS51]: -#[derive(Clone, Copy, Default, Debug)] +/// Flags are used to identify [`EmojiSegmentationCategory`]. +#[derive(Clone, Copy, Default)] pub(crate) struct EmojiFlags(u32); impl EmojiFlags { @@ -27,18 +28,18 @@ impl EmojiFlags { const EMOJI_COMPONENT_MASK: u32 = 1 << Self::EMOJI_COMPONENT_SHIFT; const REGIONAL_INDICATOR_MASK: u32 = 1 << Self::REGIONAL_INDICATOR_SHIFT; - #[inline(always)] + #[inline] pub(crate) const fn new() -> Self { Self(0) } - #[inline(always)] + #[inline] pub(crate) const fn with_emoji(mut self, is_emoji: bool) -> Self { self.0 |= (is_emoji as u32) << Self::EMOJI_SHIFT; self } - #[inline(always)] + #[inline] pub(crate) const fn with_extra( mut self, is_emoji_modifier: bool, @@ -55,40 +56,41 @@ impl EmojiFlags { self } - #[inline(always)] + #[inline] pub(crate) const fn is_emoji(self) -> bool { self.0 & Self::EMOJI_MASK != 0 } - #[inline(always)] + #[inline] pub(crate) const fn is_emoji_modifier(self) -> bool { self.0 & Self::EMOJI_MODIFIER_MASK != 0 } - #[inline(always)] + #[inline] pub(crate) const fn is_emoji_modifier_base(self) -> bool { self.0 & Self::EMOJI_MODIFIER_BASE_MASK != 0 } - #[inline(always)] + #[inline] pub(crate) const fn is_emoji_presentation(self) -> bool { self.0 & Self::EMOJI_PRESENTATION_MASK != 0 } #[allow(unused)] - #[inline(always)] + #[inline] pub(crate) const fn is_emoji_component(self) -> bool { self.0 & Self::EMOJI_COMPONENT_MASK != 0 } - #[inline(always)] + #[inline] pub(crate) const fn is_regional_indicator(self) -> bool { self.0 & Self::REGIONAL_INDICATOR_MASK != 0 } } +/// Represents the category of an emoji segmentation. #[repr(u8)] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug, PartialEq)] pub(crate) enum EmojiSegmentationCategory { Emoji = 0, EmojiTextPresentation, @@ -110,11 +112,12 @@ pub(crate) enum EmojiSegmentationCategory { } impl EmojiSegmentationCategory { - #[inline(always)] + /// Returns the category of the given codepoint and flags. + #[inline] pub(crate) const fn from_codepoint(cp: u32, flags: EmojiFlags) -> Self { match cp { // '0'..'9', '#', '*' - 0x30..=0x39 | 0x23 | 0x2a => Self::KeycapBase, + 0x30..=0x39 | 0x23 | 0x2A => Self::KeycapBase, 0x200D => Self::Zwj, 0x20E0 => Self::CombiningEnclosingCircleBackslash, 0x20E3 => Self::CombiningEnclosingKeycap, @@ -160,6 +163,7 @@ impl EmojiSegmentationCategory { } } +/// Used to control the presentation style of the emoji. #[derive(Clone, Copy, Default, PartialEq, Eq, Debug)] pub(crate) struct ScannedEmojiPresentation { pub is_emoji: bool, @@ -167,16 +171,21 @@ pub(crate) struct ScannedEmojiPresentation { } impl ScannedEmojiPresentation { + /// Returns true if the scanned sequence is an emoji presentation. pub(crate) fn is_emoji(self) -> bool { self.is_emoji } + /// Clears the emoji presentation state. pub(crate) fn clear(&mut self) { self.is_emoji = false; self.has_vs = false; } } +/// Scan the given categories for an emoji presentation sequence. +/// +/// Returns a [`ScannedEmojiPresentation`] indicating whether the sequence is an emoji presentation. pub(crate) const fn scan_emoji_presentation( categories: &[EmojiSegmentationCategory], ) -> ScannedEmojiPresentation { @@ -192,8 +201,6 @@ pub(crate) const fn scan_emoji_presentation( let (is_any_emoji, is_emoji_modifier_base, is_emoji_presentation) = emoji_matches(categories[0]); - // In order to give the the VS15 sequences higher priority than detecting - // // text_emoji_run_with_vs let is_text_emoji_presentation_sequence = is_any_emoji && len >= 2 && categories[1].eq(EmojiSegmentationCategory::Vs15); @@ -213,13 +220,6 @@ pub(crate) const fn scan_emoji_presentation( }; } - if is_unqualified_keycap_sequence(categories) { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }; - } - let is_emoji_combining_enclosing_circle_backslash_sequence = is_any_emoji && len == 2 && categories[1].eq(EmojiSegmentationCategory::CombiningEnclosingCircleBackslash); @@ -237,7 +237,6 @@ pub(crate) const fn scan_emoji_presentation( }; } - // TAG_BASE TAG_SEQUENCE+ TAG_TERM; if is_emoji_tag_sequence(categories) { return ScannedEmojiPresentation { is_emoji: true, @@ -265,7 +264,7 @@ pub(crate) const fn scan_emoji_presentation( }; } - let mut cursor = if is_emoji_presentation_sequence || is_emoji_modifier_sequence { + let cursor = if is_emoji_presentation_sequence || is_emoji_modifier_sequence { 2 } else if is_any_emoji { 1 @@ -281,75 +280,60 @@ pub(crate) const fn scan_emoji_presentation( }; } - // zwj sequences - // - // emoji_zwj_element = emoji_presentation_sequence | emoji_modifier_sequence | any_emoji - // emoji_zwj_element (zwj emoji_zwj_element)+ - while cursor < len && categories[cursor].eq(EmojiSegmentationCategory::Zwj) { - cursor += 1; - - let (is_any_emoji, is_emoji_modifier_base, _) = emoji_matches(categories[cursor]); - - if cursor + 1 < len { - let is_emoji_presentation_sequence = - is_any_emoji && categories[cursor + 1].eq(EmojiSegmentationCategory::Vs16); - if is_emoji_presentation_sequence { - cursor += 2; - continue; - } - - let is_emoji_modifier_sequence = is_emoji_modifier_base - && categories[cursor + 1].eq(EmojiSegmentationCategory::EmojiModifier); - if is_emoji_modifier_sequence { - cursor += 2; - continue; - } - } + if is_emoji_zwj_sequence(categories, cursor) { + return ScannedEmojiPresentation { + is_emoji: true, + has_vs: false, + }; + } - if is_any_emoji { - cursor += 1; - continue; - } + if is_unqualified_keycap_sequence(categories) { + return ScannedEmojiPresentation { + is_emoji: true, + has_vs: false, + }; } ScannedEmojiPresentation { - is_emoji: cursor == len || is_emoji_presentation_sequence || is_emoji_modifier_sequence, + is_emoji: is_emoji_presentation_sequence || is_emoji_modifier_sequence, has_vs: false, } } /// Extracts the emoji category flags from the given category. /// -/// `is_any_emoji`: +/// ``` +/// - `is_any_emoji`: /// `EmojiTextPresentation` | `EmojiEmojiPresentation` | `KeycapBase` | /// `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` | `TagBase` | `Emoji` /// -/// `is_emoji_modifier_base`: `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` +/// - `is_emoji_modifier_base`: `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` /// -/// `is_emoji_presentation`: +/// - `is_emoji_presentation`: /// `EmojiEmojiPresentation` | `TagBase` | `EmojiModifierBaseEmoji` | /// `EmojiModifier` | `RegionalIndicator` +/// ``` +/// +/// Returns a tuple: `(is_any_emoji, is_emoji_modifier_base, is_emoji_presentation)`. /// -/// Returns `(is_any_emoji, is_emoji_modifier_base, is_emoji_presentation)` -#[inline(always)] +/// +#[inline] const fn emoji_matches(category: EmojiSegmentationCategory) -> (bool, bool, bool) { use EmojiSegmentationCategory::*; - match category { EmojiTextPresentation | KeycapBase | Emoji => (true, false, false), - EmojiEmojiPresentation | TagBase => (true, false, true), - EmojiModifierBaseText => (true, true, false), EmojiModifierBaseEmoji => (true, true, true), - EmojiModifier | RegionalIndicator => (false, false, true), - _ => (false, false, false), } } -#[inline(always)] +/// Text emoji keycap sequence. +/// +/// This is a special case of text emoji presentation sequence. +#[inline] const fn is_text_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { categories.len() == 3 && categories[0].eq(EmojiSegmentationCategory::KeycapBase) @@ -357,22 +341,20 @@ const fn is_text_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) && categories[2].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) } -#[inline(always)] -const fn is_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { - categories.len() == 3 - && categories[0].eq(EmojiSegmentationCategory::KeycapBase) - && categories[1].eq(EmojiSegmentationCategory::Vs16) - && categories[2].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) -} - -#[inline(always)] +/// Emoji flag sequence. +/// +/// +#[inline] const fn is_emoji_flag_sequence(categories: &[EmojiSegmentationCategory]) -> bool { categories.len() == 2 && categories[0].eq(EmojiSegmentationCategory::RegionalIndicator) && categories[1].eq(EmojiSegmentationCategory::RegionalIndicator) } -#[inline(always)] +/// Emoji tag sequence (ETS). +/// +/// +#[inline] const fn is_emoji_tag_sequence(categories: &[EmojiSegmentationCategory]) -> bool { let is_tag_sequence = categories.len() >= 2 && categories[0].eq(EmojiSegmentationCategory::TagBase) @@ -389,7 +371,54 @@ const fn is_emoji_tag_sequence(categories: &[EmojiSegmentationCategory]) -> bool is_tag_sequence } -#[inline(always)] +/// Emoji keycap sequence. +/// +/// +#[inline] +const fn is_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { + categories.len() == 3 + && categories[0].eq(EmojiSegmentationCategory::KeycapBase) + && categories[1].eq(EmojiSegmentationCategory::Vs16) + && categories[2].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) +} + +/// Emoji ZWJ sequence. +/// +/// +const fn is_emoji_zwj_sequence( + categories: &[EmojiSegmentationCategory], + mut cursor: usize, +) -> bool { + while cursor + 1 < categories.len() && categories[cursor].eq(EmojiSegmentationCategory::Zwj) { + cursor += 1; + + let (is_any_emoji, is_emoji_modifier_base, _) = emoji_matches(categories[cursor]); + + if cursor + 1 < categories.len() { + let is_emoji_presentation_sequence = + is_any_emoji && categories[cursor + 1].eq(EmojiSegmentationCategory::Vs16); + if is_emoji_presentation_sequence { + cursor += 2; + continue; + } + + let is_emoji_modifier_sequence = is_emoji_modifier_base + && categories[cursor + 1].eq(EmojiSegmentationCategory::EmojiModifier); + if is_emoji_modifier_sequence { + cursor += 2; + continue; + } + } + + if is_any_emoji { + cursor += 1; + } + } + + cursor == categories.len() +} + +#[inline] const fn is_unqualified_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { categories.len() == 2 && categories[0].eq(EmojiSegmentationCategory::KeycapBase) diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index c19d21912..bed614cb7 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -276,8 +276,8 @@ fn fill_cluster_in_place( ), ); - is_emoji_presentation_selector = EmojiSegmentationCategory::Vs16.eq(&category) - || EmojiSegmentationCategory::Vs15.eq(&category); + is_emoji_presentation_selector = category.eq(&EmojiSegmentationCategory::Vs16) + || category.eq(&EmojiSegmentationCategory::Vs15); emoji_segmentations.push(category); } From 09e21ee919c52c05a1707e8de31a4cea59128200 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 10:53:23 +0800 Subject: [PATCH 22/34] add comments --- parley/src/emoji/mod.rs | 6 ++---- parley/src/tests/test_emoji_segmenters.rs | 4 ++++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index acc6505b0..0c6084059 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -1,11 +1,11 @@ // Copyright 2026 the Parley Authors // SPDX-License-Identifier: Apache-2.0 OR MIT -//! This implementation is based on [Emoji Segmenter]'s Ragel grammar (Apache-2.0). +//! This implementation is based on [emoji segmenter]'s Ragel grammar (Apache-2.0). //! //! And follow the [UTS51](Unicode Technical Standard #51). //! -//! [Emoji Segmenter]: +//! [emoji segmenter]: //! [UTS51]: /// Flags are used to identify [`EmojiSegmentationCategory`]. @@ -302,7 +302,6 @@ pub(crate) const fn scan_emoji_presentation( /// Extracts the emoji category flags from the given category. /// -/// ``` /// - `is_any_emoji`: /// `EmojiTextPresentation` | `EmojiEmojiPresentation` | `KeycapBase` | /// `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` | `TagBase` | `Emoji` @@ -312,7 +311,6 @@ pub(crate) const fn scan_emoji_presentation( /// - `is_emoji_presentation`: /// `EmojiEmojiPresentation` | `TagBase` | `EmojiModifierBaseEmoji` | /// `EmojiModifier` | `RegionalIndicator` -/// ``` /// /// Returns a tuple: `(is_any_emoji, is_emoji_modifier_base, is_emoji_presentation)`. /// diff --git a/parley/src/tests/test_emoji_segmenters.rs b/parley/src/tests/test_emoji_segmenters.rs index bb8acafd1..b45a34526 100644 --- a/parley/src/tests/test_emoji_segmenters.rs +++ b/parley/src/tests/test_emoji_segmenters.rs @@ -1,6 +1,10 @@ // Copyright 2026 the Parley Authors // SPDX-License-Identifier: Apache-2.0 OR MIT +//! Tests extracted from the [emoji segmenter]. +//! +//! [emoji segmenter]: + use core::char; use std::vec::Vec; From d200e15e638acf0a00198d8cc9ad848157bef2fa Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 10:58:44 +0800 Subject: [PATCH 23/34] fix doc --- parley/src/emoji/mod.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 0c6084059..3babae0a3 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -113,6 +113,8 @@ pub(crate) enum EmojiSegmentationCategory { impl EmojiSegmentationCategory { /// Returns the category of the given codepoint and flags. + /// + /// #[inline] pub(crate) const fn from_codepoint(cp: u32, flags: EmojiFlags) -> Self { match cp { @@ -163,7 +165,7 @@ impl EmojiSegmentationCategory { } } -/// Used to control the presentation style of the emoji. +/// Used to control the presentation style of an emoji. #[derive(Clone, Copy, Default, PartialEq, Eq, Debug)] pub(crate) struct ScannedEmojiPresentation { pub is_emoji: bool, @@ -303,14 +305,14 @@ pub(crate) const fn scan_emoji_presentation( /// Extracts the emoji category flags from the given category. /// /// - `is_any_emoji`: -/// `EmojiTextPresentation` | `EmojiEmojiPresentation` | `KeycapBase` | -/// `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` | `TagBase` | `Emoji` +/// `EmojiTextPresentation` | `EmojiEmojiPresentation` | `KeycapBase` | +/// `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` | `TagBase` | `Emoji` /// /// - `is_emoji_modifier_base`: `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` /// /// - `is_emoji_presentation`: -/// `EmojiEmojiPresentation` | `TagBase` | `EmojiModifierBaseEmoji` | -/// `EmojiModifier` | `RegionalIndicator` +/// `EmojiEmojiPresentation` | `TagBase` | `EmojiModifierBaseEmoji` | +/// `EmojiModifier` | `RegionalIndicator` /// /// Returns a tuple: `(is_any_emoji, is_emoji_modifier_base, is_emoji_presentation)`. /// From 953b93a2633190124dab0f4c2ef40211996b078d Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 11:04:45 +0800 Subject: [PATCH 24/34] fix doc link --- parley/src/tests/test_emoji_segmenters.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parley/src/tests/test_emoji_segmenters.rs b/parley/src/tests/test_emoji_segmenters.rs index b45a34526..3d377af55 100644 --- a/parley/src/tests/test_emoji_segmenters.rs +++ b/parley/src/tests/test_emoji_segmenters.rs @@ -3,7 +3,7 @@ //! Tests extracted from the [emoji segmenter]. //! -//! [emoji segmenter]: +//! [emoji segmenter]: use core::char; use std::vec::Vec; From 0a29ecc37087458cc46be5c21a33cc2329048d5b Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 11:09:54 +0800 Subject: [PATCH 25/34] fast path --- parley/src/emoji/mod.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 3babae0a3..426e76755 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -360,6 +360,10 @@ const fn is_emoji_tag_sequence(categories: &[EmojiSegmentationCategory]) -> bool && categories[0].eq(EmojiSegmentationCategory::TagBase) && categories[categories.len() - 1].eq(EmojiSegmentationCategory::TagTerm); + if !is_tag_sequence { + return false; + } + let mut i = 1; while i < categories.len() - 1 { if !categories[i].eq(EmojiSegmentationCategory::TagSequence) { From c65739205fcf7ed79b890450642455badfef7688 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 12:57:58 +0800 Subject: [PATCH 26/34] add const --- parley/src/emoji/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 426e76755..60b7216b6 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -174,12 +174,12 @@ pub(crate) struct ScannedEmojiPresentation { impl ScannedEmojiPresentation { /// Returns true if the scanned sequence is an emoji presentation. - pub(crate) fn is_emoji(self) -> bool { + pub(crate) const fn is_emoji(self) -> bool { self.is_emoji } /// Clears the emoji presentation state. - pub(crate) fn clear(&mut self) { + pub(crate) const fn clear(&mut self) { self.is_emoji = false; self.has_vs = false; } From 2cf90eb916a21a187eb464239be572d800ae30ec Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Sun, 10 May 2026 19:52:56 +0800 Subject: [PATCH 27/34] fix tag sequence range --- parley/src/emoji/mod.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 60b7216b6..09cd778c7 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -126,7 +126,7 @@ impl EmojiSegmentationCategory { 0xFE0E => Self::Vs15, 0xFE0F => Self::Vs16, 0x1F3F4 => Self::TagBase, - 0xE0030..=0xE0039 | 0xE0061..0xE007A => Self::TagSequence, + 0xE0030..=0xE0039 | 0xE0061..=0xE007A => Self::TagSequence, 0xE007F => Self::TagTerm, _ => { if flags.is_emoji_modifier_base() { @@ -206,7 +206,8 @@ pub(crate) const fn scan_emoji_presentation( // text_emoji_run_with_vs let is_text_emoji_presentation_sequence = is_any_emoji && len >= 2 && categories[1].eq(EmojiSegmentationCategory::Vs15); - if is_text_emoji_presentation_sequence && len == 2 || is_text_emoji_keycap_sequence(categories) + if (is_text_emoji_presentation_sequence && len == 2) + || is_text_emoji_keycap_sequence(categories) { return ScannedEmojiPresentation { is_emoji: false, From d1561cafc5a9e051bddc365f666e6684c23013f5 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Tue, 12 May 2026 20:01:48 +0800 Subject: [PATCH 28/34] impl Emoji DFA --- parley/src/analysis/cluster.rs | 6 +- parley/src/emoji/dfa.rs | 250 +++++++++++++ parley/src/emoji/mod.rs | 424 +--------------------- parley/src/emoji/types.rs | 277 ++++++++++++++ parley/src/shape/mod.rs | 14 +- parley/src/tests/test_emoji_segmenters.rs | 213 +++-------- 6 files changed, 598 insertions(+), 586 deletions(-) create mode 100644 parley/src/emoji/dfa.rs create mode 100644 parley/src/emoji/types.rs diff --git a/parley/src/analysis/cluster.rs b/parley/src/analysis/cluster.rs index a8a9d4686..9890faec0 100644 --- a/parley/src/analysis/cluster.rs +++ b/parley/src/analysis/cluster.rs @@ -4,7 +4,7 @@ use alloc::vec::Vec; use icu_normalizer::properties::Decomposed; -use crate::{analysis::AnalysisDataSources, emoji::ScannedEmojiPresentation}; +use crate::{analysis::AnalysisDataSources, emoji::EmojiPresentationStyle}; /// The maximum number of characters in a single cluster. const MAX_CLUSTER_SIZE: usize = 32; @@ -16,7 +16,7 @@ pub(crate) struct CharCluster { pub start: u32, pub end: u32, pub force_normalize: bool, - pub scanned_emoji_presentation: ScannedEmojiPresentation, + pub emoji_presentation_style: EmojiPresentationStyle, comp: Form, decomp: Form, form: FormKind, @@ -103,7 +103,7 @@ impl CharCluster { self.decomp.clear(); self.form = FormKind::Original; self.best_ratio = 0.; - self.scanned_emoji_presentation.clear(); + self.emoji_presentation_style = EmojiPresentationStyle::Default; } #[inline(always)] diff --git a/parley/src/emoji/dfa.rs b/parley/src/emoji/dfa.rs new file mode 100644 index 000000000..edd860f0c --- /dev/null +++ b/parley/src/emoji/dfa.rs @@ -0,0 +1,250 @@ +use crate::emoji::types::EmojiSequence; + +use super::types::{EmojiPresentationStyle, EmojiSegmentationCategory, EmojiState}; + +/// The transition table for Emoji DFA. +/// +/// +static DFA_TRANS: [[EmojiState; 16]; 15] = { + use EmojiSegmentationCategory as Category; + use EmojiState as State; + + let mut t = [[State::Reject; 16]; 15]; + + { + t[State::Start.as_usize()][Category::None.as_usize()] = State::Start; + t[State::Start.as_usize()][Category::KeycapTerm.as_usize()] = State::Start; + t[State::Start.as_usize()][Category::Zwj.as_usize()] = State::Start; + t[State::Start.as_usize()][Category::Vs15.as_usize()] = State::Start; + t[State::Start.as_usize()][Category::Vs16.as_usize()] = State::Start; + t[State::Start.as_usize()][Category::TagSpec.as_usize()] = State::Start; + t[State::Start.as_usize()][Category::TagTerm.as_usize()] = State::Start; + } + + // Text and Emoji presentation sequences + { + t[State::Start.as_usize()][Category::Emoji.as_usize()] = State::Emoji; + + t[State::Start.as_usize()][Category::EmojiTextPresentation.as_usize()] = State::Emoji; + t[State::Start.as_usize()][Category::EmojiEmojiPresentation.as_usize()] = State::Emoji; + + // Text presentation sequence + // + // + t[State::Emoji.as_usize()][Category::Vs15.as_usize()] = State::Terminal; + + // Emoji presentation sequence + // + // + t[State::Emoji.as_usize()][Category::Vs16.as_usize()] = State::OptionalZwj; + + // ZWJ + t[State::Emoji.as_usize()][Category::Zwj.as_usize()] = State::Zwj; + } + + // Emoji modifier sequence + // + // + { + // text + t[State::Start.as_usize()][Category::EmojiModifierBaseText.as_usize()] = + State::EmojiModifierBaseText; + + t[State::EmojiModifierBaseText.as_usize()][Category::Vs15.as_usize()] = State::Terminal; + t[State::EmojiModifierBaseText.as_usize()][Category::Vs16.as_usize()] = State::Terminal; + t[State::EmojiModifierBaseText.as_usize()][Category::EmojiModifier.as_usize()] = + State::OptionalZwj; + + // emoji + t[State::Start.as_usize()][Category::EmojiModifierBaseEmoji.as_usize()] = + State::EmojiModifierBaseEmoji; + + t[State::EmojiModifierBaseEmoji.as_usize()][Category::Vs16.as_usize()] = State::OptionalZwj; + t[State::EmojiModifierBaseEmoji.as_usize()][Category::Zwj.as_usize()] = State::Zwj; + t[State::EmojiModifierBaseEmoji.as_usize()][Category::EmojiModifier.as_usize()] = + State::OptionalZwj; + + // other + t[State::Start.as_usize()][Category::EmojiModifier.as_usize()] = State::Terminal; + } + + // Emoji flag sequence -- A sequence of two Regional Indicator characters. + // + // + { + t[State::Start.as_usize()][Category::Ri.as_usize()] = State::Ri; + + t[State::Ri.as_usize()][Category::Ri.as_usize()] = State::Terminal; + } + + // Emoji tag sequence (ETS). + // + // + { + t[State::Start.as_usize()][Category::TagBase.as_usize()] = State::TagBase; + + t[State::TagBase.as_usize()][Category::Vs15.as_usize()] = State::Terminal; + t[State::TagBase.as_usize()][Category::Vs16.as_usize()] = State::OptionalZwj; + t[State::TagBase.as_usize()][Category::TagSpec.as_usize()] = State::TagSpec; + t[State::TagBase.as_usize()][Category::TagTerm.as_usize()] = State::TagEmpty; // without any `TagSpec` + t[State::TagBase.as_usize()][Category::Zwj.as_usize()] = State::Zwj; + + // (seq)+ + t[State::TagSpec.as_usize()][Category::TagSpec.as_usize()] = State::TagSpec; + t[State::TagSpec.as_usize()][Category::TagTerm.as_usize()] = State::Terminal; + } + + // Emoji keycap sequence. + // + // + { + t[State::Start.as_usize()][Category::KeycapBase.as_usize()] = State::KeycapBase; + + t[State::KeycapBase.as_usize()][Category::KeycapTerm.as_usize()] = State::Terminal; + t[State::KeycapBase.as_usize()][Category::Vs15.as_usize()] = State::KeycapVs; + t[State::KeycapBase.as_usize()][Category::Vs16.as_usize()] = State::KeycapVs; + + t[State::KeycapVs.as_usize()][Category::KeycapTerm.as_usize()] = State::Terminal; + } + + // Emoji ZWJ sequence. + // + // + { + t[State::OptionalZwj.as_usize()][Category::Zwj.as_usize()] = State::Zwj; + + // (zwj emoji_zwj_element)+ + t[State::Zwj.as_usize()][Category::Emoji.as_usize()] = State::Emoji; + t[State::Zwj.as_usize()][Category::EmojiEmojiPresentation.as_usize()] = State::Emoji; + t[State::Zwj.as_usize()][Category::EmojiModifierBaseEmoji.as_usize()] = + State::EmojiModifierBaseEmoji; + } + + t +}; + +#[derive(Clone, Copy, Debug)] +pub(crate) struct EmojiDFA { + state: EmojiState, + // [state, category] + recorded: [u16; 2], +} + +impl EmojiDFA { + const DEFAULT: Self = Self { + state: EmojiState::Start, + recorded: [0, 0], + }; + + pub(crate) const fn new() -> Self { + Self::DEFAULT + } + + pub(crate) const fn step(&mut self, category: EmojiSegmentationCategory) { + self.state = DFA_TRANS[self.state.as_usize()][category.as_usize()]; + } + + // pub(crate) const fn step_record(&mut self, category: EmojiSegmentationCategory) { + pub(crate) fn step_record(&mut self, category: EmojiSegmentationCategory) { + self.step(category); + + if self.is_rejected() || self.is_started() { + return; + } + + self.recorded[0] |= 1 << self.state.as_u8(); + self.recorded[1] |= 1 << category.as_u8(); + } + + pub(crate) const fn is_rejected(&self) -> bool { + self.state.eq(EmojiState::Reject) + } + + pub(crate) const fn is_started(&self) -> bool { + self.state.eq(EmojiState::Start) + } + + #[allow(unused)] + pub(crate) const fn is_accepting(&self) -> bool { + const START: u8 = EmojiState::Terminal.as_u8(); + const END: u8 = EmojiState::Ri.as_u8(); + + let cur = self.state.as_u8(); + + START <= cur && cur <= END + } + + pub(crate) const fn contains_state(&self, state: EmojiState) -> bool { + self.recorded[0] & (1 << state.as_u8()) != 0 + } + + pub(crate) const fn contains_category(&self, category: EmojiSegmentationCategory) -> bool { + self.recorded[1] & (1 << category.as_u8()) != 0 + } + + pub(crate) const fn sequence(&self) -> EmojiSequence { + if self.contains_category(EmojiSegmentationCategory::Zwj) { + return EmojiSequence::Zwj; + } + + if self.contains_state(EmojiState::TagBase) + && self.contains_state(EmojiState::Terminal) + && !self.contains_category(EmojiSegmentationCategory::Vs15) + { + return EmojiSequence::Tag; + } + + if self.contains_state(EmojiState::Ri) && self.contains_state(EmojiState::Terminal) { + return EmojiSequence::Flag; + } + + if (self.contains_category(EmojiSegmentationCategory::EmojiModifierBaseEmoji) + || self.contains_category(EmojiSegmentationCategory::EmojiModifierBaseText)) + && self.contains_category(EmojiSegmentationCategory::EmojiModifier) + { + return EmojiSequence::Modifier; + } + + if self.contains_category(EmojiSegmentationCategory::KeycapBase) + && self.contains_category(EmojiSegmentationCategory::Vs16) + && self.contains_category(EmojiSegmentationCategory::KeycapTerm) + { + return EmojiSequence::Keycap; + } + + if self.contains_category(EmojiSegmentationCategory::KeycapTerm) + && self.contains_category(EmojiSegmentationCategory::Vs16) + { + return EmojiSequence::Keycap; + } + + EmojiSequence::Basic + } + + pub(crate) const fn presentation_style(&self) -> EmojiPresentationStyle { + if self.contains_category(EmojiSegmentationCategory::Vs15) { + return EmojiPresentationStyle::Text; + } + if self.contains_category(EmojiSegmentationCategory::Vs16) { + return EmojiPresentationStyle::Emoji; + } + + if self.contains_category(EmojiSegmentationCategory::EmojiTextPresentation) { + return EmojiPresentationStyle::Text; + } + if self.contains_category(EmojiSegmentationCategory::EmojiEmojiPresentation) { + return EmojiPresentationStyle::Emoji; + } + + if !self.sequence().eq(EmojiSequence::Basic) { + return EmojiPresentationStyle::Emoji; + } + + // single emoji character + if self.contains_category(EmojiSegmentationCategory::EmojiModifierBaseText) { + return EmojiPresentationStyle::Text; + } + + EmojiPresentationStyle::Default + } +} diff --git a/parley/src/emoji/mod.rs b/parley/src/emoji/mod.rs index 09cd778c7..26e9ec779 100644 --- a/parley/src/emoji/mod.rs +++ b/parley/src/emoji/mod.rs @@ -8,424 +8,8 @@ //! [emoji segmenter]: //! [UTS51]: -/// Flags are used to identify [`EmojiSegmentationCategory`]. -#[derive(Clone, Copy, Default)] -pub(crate) struct EmojiFlags(u32); +mod dfa; +mod types; -impl EmojiFlags { - const EMOJI_SHIFT: u32 = 0; - const EMOJI_MODIFIER_SHIFT: u32 = 1; - const EMOJI_MODIFIER_BASE_SHIFT: u32 = 2; - const EMOJI_PRESENTATION_SHIFT: u32 = 3; - const EMOJI_COMPONENT_SHIFT: u32 = 4; - const REGIONAL_INDICATOR_SHIFT: u32 = 5; - - const EMOJI_MASK: u32 = 1 << Self::EMOJI_SHIFT; - const EMOJI_MODIFIER_MASK: u32 = 1 << Self::EMOJI_MODIFIER_SHIFT; - const EMOJI_MODIFIER_BASE_MASK: u32 = 1 << Self::EMOJI_MODIFIER_BASE_SHIFT; - const EMOJI_PRESENTATION_MASK: u32 = 1 << Self::EMOJI_PRESENTATION_SHIFT; - #[allow(unused)] - const EMOJI_COMPONENT_MASK: u32 = 1 << Self::EMOJI_COMPONENT_SHIFT; - const REGIONAL_INDICATOR_MASK: u32 = 1 << Self::REGIONAL_INDICATOR_SHIFT; - - #[inline] - pub(crate) const fn new() -> Self { - Self(0) - } - - #[inline] - pub(crate) const fn with_emoji(mut self, is_emoji: bool) -> Self { - self.0 |= (is_emoji as u32) << Self::EMOJI_SHIFT; - self - } - - #[inline] - pub(crate) const fn with_extra( - mut self, - is_emoji_modifier: bool, - is_emoji_modifier_base: bool, - is_emoji_presentation: bool, - is_emoji_component: bool, - is_regional_indicator: bool, - ) -> Self { - self.0 |= (is_emoji_modifier as u32) << Self::EMOJI_MODIFIER_SHIFT; - self.0 |= (is_emoji_modifier_base as u32) << Self::EMOJI_MODIFIER_BASE_SHIFT; - self.0 |= (is_emoji_presentation as u32) << Self::EMOJI_PRESENTATION_SHIFT; - self.0 |= (is_emoji_component as u32) << Self::EMOJI_COMPONENT_SHIFT; - self.0 |= (is_regional_indicator as u32) << Self::REGIONAL_INDICATOR_SHIFT; - self - } - - #[inline] - pub(crate) const fn is_emoji(self) -> bool { - self.0 & Self::EMOJI_MASK != 0 - } - - #[inline] - pub(crate) const fn is_emoji_modifier(self) -> bool { - self.0 & Self::EMOJI_MODIFIER_MASK != 0 - } - - #[inline] - pub(crate) const fn is_emoji_modifier_base(self) -> bool { - self.0 & Self::EMOJI_MODIFIER_BASE_MASK != 0 - } - - #[inline] - pub(crate) const fn is_emoji_presentation(self) -> bool { - self.0 & Self::EMOJI_PRESENTATION_MASK != 0 - } - - #[allow(unused)] - #[inline] - pub(crate) const fn is_emoji_component(self) -> bool { - self.0 & Self::EMOJI_COMPONENT_MASK != 0 - } - - #[inline] - pub(crate) const fn is_regional_indicator(self) -> bool { - self.0 & Self::REGIONAL_INDICATOR_MASK != 0 - } -} - -/// Represents the category of an emoji segmentation. -#[repr(u8)] -#[derive(Clone, Copy, Debug, PartialEq)] -pub(crate) enum EmojiSegmentationCategory { - Emoji = 0, - EmojiTextPresentation, - EmojiEmojiPresentation, - EmojiModifierBaseText, - EmojiModifierBaseEmoji, - EmojiModifier, - RegionalIndicator, - KeycapBase, - CombiningEnclosingKeycap, - CombiningEnclosingCircleBackslash, - Zwj, - Vs15, - Vs16, - TagBase, - TagSequence, - TagTerm, - None, -} - -impl EmojiSegmentationCategory { - /// Returns the category of the given codepoint and flags. - /// - /// - #[inline] - pub(crate) const fn from_codepoint(cp: u32, flags: EmojiFlags) -> Self { - match cp { - // '0'..'9', '#', '*' - 0x30..=0x39 | 0x23 | 0x2A => Self::KeycapBase, - 0x200D => Self::Zwj, - 0x20E0 => Self::CombiningEnclosingCircleBackslash, - 0x20E3 => Self::CombiningEnclosingKeycap, - 0xFE0E => Self::Vs15, - 0xFE0F => Self::Vs16, - 0x1F3F4 => Self::TagBase, - 0xE0030..=0xE0039 | 0xE0061..=0xE007A => Self::TagSequence, - 0xE007F => Self::TagTerm, - _ => { - if flags.is_emoji_modifier_base() { - if flags.is_emoji_presentation() { - return Self::EmojiModifierBaseEmoji; - } - return Self::EmojiModifierBaseText; - } - - if flags.is_emoji_modifier() { - return Self::EmojiModifier; - } - - if flags.is_regional_indicator() { - return Self::RegionalIndicator; - } - - if flags.is_emoji_presentation() { - return Self::EmojiEmojiPresentation; - } - - if flags.is_emoji() { - if !flags.is_emoji_presentation() { - return Self::EmojiTextPresentation; - } - return Self::Emoji; - } - - Self::None - } - } - } - - const fn eq(self, other: Self) -> bool { - self as u8 == other as u8 - } -} - -/// Used to control the presentation style of an emoji. -#[derive(Clone, Copy, Default, PartialEq, Eq, Debug)] -pub(crate) struct ScannedEmojiPresentation { - pub is_emoji: bool, - pub has_vs: bool, -} - -impl ScannedEmojiPresentation { - /// Returns true if the scanned sequence is an emoji presentation. - pub(crate) const fn is_emoji(self) -> bool { - self.is_emoji - } - - /// Clears the emoji presentation state. - pub(crate) const fn clear(&mut self) { - self.is_emoji = false; - self.has_vs = false; - } -} - -/// Scan the given categories for an emoji presentation sequence. -/// -/// Returns a [`ScannedEmojiPresentation`] indicating whether the sequence is an emoji presentation. -pub(crate) const fn scan_emoji_presentation( - categories: &[EmojiSegmentationCategory], -) -> ScannedEmojiPresentation { - let len = categories.len(); - - if len == 0 { - return ScannedEmojiPresentation { - is_emoji: false, - has_vs: false, - }; - } - - let (is_any_emoji, is_emoji_modifier_base, is_emoji_presentation) = - emoji_matches(categories[0]); - - // text_emoji_run_with_vs - let is_text_emoji_presentation_sequence = - is_any_emoji && len >= 2 && categories[1].eq(EmojiSegmentationCategory::Vs15); - if (is_text_emoji_presentation_sequence && len == 2) - || is_text_emoji_keycap_sequence(categories) - { - return ScannedEmojiPresentation { - is_emoji: false, - has_vs: true, - }; - } - - // emoji_run - if is_emoji_presentation && len == 1 { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }; - } - - let is_emoji_combining_enclosing_circle_backslash_sequence = is_any_emoji - && len == 2 - && categories[1].eq(EmojiSegmentationCategory::CombiningEnclosingCircleBackslash); - if is_emoji_combining_enclosing_circle_backslash_sequence { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }; - } - - if is_emoji_flag_sequence(categories) { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }; - } - - if is_emoji_tag_sequence(categories) { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }; - } - - // emoji_run_with_vs - let is_emoji_presentation_sequence = - is_any_emoji && len >= 2 && categories[1].eq(EmojiSegmentationCategory::Vs16); - if (is_emoji_presentation_sequence && len == 2) || is_emoji_keycap_sequence(categories) { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: true, - }; - } - - let is_emoji_modifier_sequence = is_emoji_modifier_base - && len >= 2 - && categories[1].eq(EmojiSegmentationCategory::EmojiModifier); - if is_emoji_modifier_sequence && len == 2 { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }; - } - - let cursor = if is_emoji_presentation_sequence || is_emoji_modifier_sequence { - 2 - } else if is_any_emoji { - 1 - } else { - len - }; - - // fast path - if cursor == len { - return ScannedEmojiPresentation { - is_emoji: false, - has_vs: is_text_emoji_presentation_sequence, - }; - } - - if is_emoji_zwj_sequence(categories, cursor) { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }; - } - - if is_unqualified_keycap_sequence(categories) { - return ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }; - } - - ScannedEmojiPresentation { - is_emoji: is_emoji_presentation_sequence || is_emoji_modifier_sequence, - has_vs: false, - } -} - -/// Extracts the emoji category flags from the given category. -/// -/// - `is_any_emoji`: -/// `EmojiTextPresentation` | `EmojiEmojiPresentation` | `KeycapBase` | -/// `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` | `TagBase` | `Emoji` -/// -/// - `is_emoji_modifier_base`: `EmojiModifierBaseText` | `EmojiModifierBaseEmoji` -/// -/// - `is_emoji_presentation`: -/// `EmojiEmojiPresentation` | `TagBase` | `EmojiModifierBaseEmoji` | -/// `EmojiModifier` | `RegionalIndicator` -/// -/// Returns a tuple: `(is_any_emoji, is_emoji_modifier_base, is_emoji_presentation)`. -/// -/// -#[inline] -const fn emoji_matches(category: EmojiSegmentationCategory) -> (bool, bool, bool) { - use EmojiSegmentationCategory::*; - match category { - EmojiTextPresentation | KeycapBase | Emoji => (true, false, false), - EmojiEmojiPresentation | TagBase => (true, false, true), - EmojiModifierBaseText => (true, true, false), - EmojiModifierBaseEmoji => (true, true, true), - EmojiModifier | RegionalIndicator => (false, false, true), - _ => (false, false, false), - } -} - -/// Text emoji keycap sequence. -/// -/// This is a special case of text emoji presentation sequence. -#[inline] -const fn is_text_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { - categories.len() == 3 - && categories[0].eq(EmojiSegmentationCategory::KeycapBase) - && categories[1].eq(EmojiSegmentationCategory::Vs15) - && categories[2].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) -} - -/// Emoji flag sequence. -/// -/// -#[inline] -const fn is_emoji_flag_sequence(categories: &[EmojiSegmentationCategory]) -> bool { - categories.len() == 2 - && categories[0].eq(EmojiSegmentationCategory::RegionalIndicator) - && categories[1].eq(EmojiSegmentationCategory::RegionalIndicator) -} - -/// Emoji tag sequence (ETS). -/// -/// -#[inline] -const fn is_emoji_tag_sequence(categories: &[EmojiSegmentationCategory]) -> bool { - let is_tag_sequence = categories.len() >= 2 - && categories[0].eq(EmojiSegmentationCategory::TagBase) - && categories[categories.len() - 1].eq(EmojiSegmentationCategory::TagTerm); - - if !is_tag_sequence { - return false; - } - - let mut i = 1; - while i < categories.len() - 1 { - if !categories[i].eq(EmojiSegmentationCategory::TagSequence) { - return false; - } - i += 1; - } - - is_tag_sequence -} - -/// Emoji keycap sequence. -/// -/// -#[inline] -const fn is_emoji_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { - categories.len() == 3 - && categories[0].eq(EmojiSegmentationCategory::KeycapBase) - && categories[1].eq(EmojiSegmentationCategory::Vs16) - && categories[2].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) -} - -/// Emoji ZWJ sequence. -/// -/// -const fn is_emoji_zwj_sequence( - categories: &[EmojiSegmentationCategory], - mut cursor: usize, -) -> bool { - while cursor + 1 < categories.len() && categories[cursor].eq(EmojiSegmentationCategory::Zwj) { - cursor += 1; - - let (is_any_emoji, is_emoji_modifier_base, _) = emoji_matches(categories[cursor]); - - if cursor + 1 < categories.len() { - let is_emoji_presentation_sequence = - is_any_emoji && categories[cursor + 1].eq(EmojiSegmentationCategory::Vs16); - if is_emoji_presentation_sequence { - cursor += 2; - continue; - } - - let is_emoji_modifier_sequence = is_emoji_modifier_base - && categories[cursor + 1].eq(EmojiSegmentationCategory::EmojiModifier); - if is_emoji_modifier_sequence { - cursor += 2; - continue; - } - } - - if is_any_emoji { - cursor += 1; - } - } - - cursor == categories.len() -} - -#[inline] -const fn is_unqualified_keycap_sequence(categories: &[EmojiSegmentationCategory]) -> bool { - categories.len() == 2 - && categories[0].eq(EmojiSegmentationCategory::KeycapBase) - && categories[1].eq(EmojiSegmentationCategory::CombiningEnclosingKeycap) -} +pub(crate) use dfa::EmojiDFA; +pub(crate) use types::{EmojiFlags, EmojiPresentationStyle, EmojiSegmentationCategory}; diff --git a/parley/src/emoji/types.rs b/parley/src/emoji/types.rs new file mode 100644 index 000000000..d7b3994dd --- /dev/null +++ b/parley/src/emoji/types.rs @@ -0,0 +1,277 @@ +/// Flags are used to identify [`EmojiSegmentationCategory`]. +#[derive(Clone, Copy, Default)] +pub(crate) struct EmojiFlags(u32); + +impl EmojiFlags { + const EMOJI_SHIFT: u32 = 0; + const EMOJI_MODIFIER_SHIFT: u32 = 1; + const EMOJI_MODIFIER_BASE_SHIFT: u32 = 2; + const EMOJI_PRESENTATION_SHIFT: u32 = 3; + const EMOJI_COMPONENT_SHIFT: u32 = 4; + const REGIONAL_INDICATOR_SHIFT: u32 = 5; + + const EMOJI_MASK: u32 = 1 << Self::EMOJI_SHIFT; + const EMOJI_MODIFIER_MASK: u32 = 1 << Self::EMOJI_MODIFIER_SHIFT; + const EMOJI_MODIFIER_BASE_MASK: u32 = 1 << Self::EMOJI_MODIFIER_BASE_SHIFT; + const EMOJI_PRESENTATION_MASK: u32 = 1 << Self::EMOJI_PRESENTATION_SHIFT; + #[allow(unused)] + const EMOJI_COMPONENT_MASK: u32 = 1 << Self::EMOJI_COMPONENT_SHIFT; + const REGIONAL_INDICATOR_MASK: u32 = 1 << Self::REGIONAL_INDICATOR_SHIFT; + + #[inline] + pub(crate) const fn new() -> Self { + Self(0) + } + + #[inline] + pub(crate) const fn with_emoji(mut self, is_emoji: bool) -> Self { + self.0 |= (is_emoji as u32) << Self::EMOJI_SHIFT; + self + } + + #[inline] + pub(crate) const fn with_extra( + mut self, + is_emoji_modifier: bool, + is_emoji_modifier_base: bool, + is_emoji_presentation: bool, + is_emoji_component: bool, + is_regional_indicator: bool, + ) -> Self { + self.0 |= (is_emoji_modifier as u32) << Self::EMOJI_MODIFIER_SHIFT; + self.0 |= (is_emoji_modifier_base as u32) << Self::EMOJI_MODIFIER_BASE_SHIFT; + self.0 |= (is_emoji_presentation as u32) << Self::EMOJI_PRESENTATION_SHIFT; + self.0 |= (is_emoji_component as u32) << Self::EMOJI_COMPONENT_SHIFT; + self.0 |= (is_regional_indicator as u32) << Self::REGIONAL_INDICATOR_SHIFT; + self + } + + #[inline] + pub(crate) const fn is_emoji(self) -> bool { + self.0 & Self::EMOJI_MASK != 0 + } + + #[inline] + pub(crate) const fn is_emoji_modifier(self) -> bool { + self.0 & Self::EMOJI_MODIFIER_MASK != 0 + } + + #[inline] + pub(crate) const fn is_emoji_modifier_base(self) -> bool { + self.0 & Self::EMOJI_MODIFIER_BASE_MASK != 0 + } + + #[inline] + pub(crate) const fn is_emoji_presentation(self) -> bool { + self.0 & Self::EMOJI_PRESENTATION_MASK != 0 + } + + #[allow(unused)] + #[inline] + pub(crate) const fn is_emoji_component(self) -> bool { + self.0 & Self::EMOJI_COMPONENT_MASK != 0 + } + + #[inline] + pub(crate) const fn is_regional_indicator(self) -> bool { + self.0 & Self::REGIONAL_INDICATOR_MASK != 0 + } +} + +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq)] +pub(crate) enum EmojiState { + Reject = 0, + Start, + + Terminal, + Emoji, + #[allow(unused)] + EmojiModifier, + EmojiModifierBaseText, + EmojiModifierBaseEmoji, + OptionalZwj, + KeycapVs, + TagBase, + /// RegionalIndicator + Ri, + + TagSpec, + TagEmpty, + KeycapBase, + Zwj, +} + +impl EmojiState { + pub(crate) const fn as_usize(self) -> usize { + self as usize + } + + pub(crate) const fn as_u8(self) -> u8 { + self as u8 + } + + pub(crate) const fn eq(self, other: Self) -> bool { + self.as_u8() == other.as_u8() + } +} + +impl core::ops::Index for [T] { + type Output = T; + + #[inline] + fn index(&self, index: EmojiState) -> &T { + &self[index.as_usize()] + } +} + +impl core::ops::IndexMut for [T] { + #[inline] + fn index_mut(&mut self, index: EmojiState) -> &mut T { + &mut self[index.as_usize()] + } +} + +/// Represents the category of an emoji segmentation. +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq)] +pub(crate) enum EmojiSegmentationCategory { + Emoji = 0, + EmojiTextPresentation, + EmojiEmojiPresentation, + EmojiModifierBaseText, + EmojiModifierBaseEmoji, + EmojiModifier, + /// RegionalIndicator + Ri, + KeycapBase, + KeycapTerm, + Zwj, + Vs15, + Vs16, + TagBase, + TagSpec, + TagTerm, + None, +} + +impl EmojiSegmentationCategory { + /// Returns the category of the given codepoint and flags. + /// + /// + #[inline] + pub(crate) fn from_codepoint(cp: u32, flags: EmojiFlags) -> Self { + match cp { + // '0'..'9', '#', '*' + 0x30..=0x39 | 0x23 | 0x2A => Self::KeycapBase, + 0x200D => Self::Zwj, + 0x20E3 => Self::KeycapTerm, + 0xFE0E => Self::Vs15, + 0xFE0F => Self::Vs16, + 0x1F3F4 => Self::TagBase, + 0xE0030..=0xE0039 | 0xE0061..=0xE007A => Self::TagSpec, + 0xE007F => Self::TagTerm, + _ => { + if flags.is_emoji_modifier_base() { + if flags.is_emoji_presentation() { + return Self::EmojiModifierBaseEmoji; + } + return Self::EmojiModifierBaseText; + } + + if flags.is_emoji_modifier() { + return Self::EmojiModifier; + } + + if flags.is_regional_indicator() { + return Self::Ri; + } + + if flags.is_emoji_presentation() { + return Self::EmojiEmojiPresentation; + } + + if flags.is_emoji() { + if !flags.is_emoji_presentation() { + return Self::EmojiTextPresentation; + } + return Self::Emoji; + } + + Self::None + } + } + } + + pub(crate) const fn as_usize(self) -> usize { + self as usize + } + + pub(crate) const fn as_u8(self) -> u8 { + self as u8 + } + + pub(crate) const fn eq(self, other: Self) -> bool { + self.as_u8() == other.as_u8() + } +} + +impl core::ops::Index for [T] { + type Output = T; + + #[inline] + fn index(&self, index: EmojiSegmentationCategory) -> &T { + &self[index.as_usize()] + } +} + +impl core::ops::IndexMut for [T] { + #[inline] + fn index_mut(&mut self, index: EmojiSegmentationCategory) -> &mut T { + &mut self[index.as_usize()] + } +} + +#[repr(u8)] +#[derive(Clone, Copy, PartialEq, Debug)] +pub(crate) enum EmojiSequence { + Basic, + Keycap, + Modifier, + Flag, + Zwj, + Tag, +} + +impl EmojiSequence { + pub(crate) const fn as_u8(self) -> u8 { + self as u8 + } + + pub(crate) const fn eq(self, other: Self) -> bool { + self.as_u8() == other.as_u8() + } +} + +#[repr(u8)] +#[derive(Clone, Copy, PartialEq, Default, Debug)] +pub(crate) enum EmojiPresentationStyle { + Emoji, + Text, + #[default] + Default, +} + +impl EmojiPresentationStyle { + pub(crate) const fn is_emoji(self) -> bool { + self.eq(Self::Emoji) + } + + pub(crate) const fn as_u8(self) -> u8 { + self as u8 + } + + pub(crate) const fn eq(self, other: Self) -> bool { + self.as_u8() == other.as_u8() + } +} diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index bed614cb7..565fcb01c 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -14,7 +14,7 @@ use super::style::{Brush, FontFeature, FontVariation}; use crate::analysis::cluster::{Char, CharCluster, Status}; use crate::analysis::{AnalysisDataSources, CharInfo}; use crate::convert::script_to_harfrust; -use crate::emoji::{EmojiFlags, EmojiSegmentationCategory, scan_emoji_presentation}; +use crate::emoji::{EmojiDFA, EmojiFlags, EmojiSegmentationCategory}; use crate::inline_box::InlineBox; use crate::lru_cache::LruCache; use crate::util::nearly_eq; @@ -240,7 +240,7 @@ fn fill_cluster_in_place( let start = *code_unit_offset_in_string as u32; let mut is_emoji = false; - let mut emoji_segmentations = Vec::with_capacity(segment_text.char_indices().count()); + let mut emoji_dfa = EmojiDFA::new(); for ((_, ch), (info, style_index)) in segment_text.char_indices().zip(item_infos_iter.by_ref()) { @@ -276,10 +276,10 @@ fn fill_cluster_in_place( ), ); - is_emoji_presentation_selector = category.eq(&EmojiSegmentationCategory::Vs16) - || category.eq(&EmojiSegmentationCategory::Vs15); + is_emoji_presentation_selector = category.eq(EmojiSegmentationCategory::Vs16) + || category.eq(EmojiSegmentationCategory::Vs15); - emoji_segmentations.push(category); + emoji_dfa.step_record(category); } let contributes_to_shaping = info.contributes_to_shaping(); @@ -305,7 +305,7 @@ fn fill_cluster_in_place( char_cluster.force_normalize = force_normalize; if is_emoji { - char_cluster.scanned_emoji_presentation = scan_emoji_presentation(&emoji_segmentations); + char_cluster.emoji_presentation_style = emoji_dfa.presentation_style(); } } @@ -595,7 +595,7 @@ impl<'a, 'b, B: Brush> FontSelector<'a, 'b, B> { analysis_data_sources: &AnalysisDataSources, ) -> Option { let style_index = cluster.style_index(); - let is_emoji = cluster.scanned_emoji_presentation.is_emoji(); + let is_emoji = cluster.emoji_presentation_style.is_emoji(); if style_index != self.style_index || is_emoji || self.fonts_id.is_none() { self.style_index = style_index; let style = &self.styles[style_index as usize]; diff --git a/parley/src/tests/test_emoji_segmenters.rs b/parley/src/tests/test_emoji_segmenters.rs index 3d377af55..5f240ce4d 100644 --- a/parley/src/tests/test_emoji_segmenters.rs +++ b/parley/src/tests/test_emoji_segmenters.rs @@ -5,20 +5,18 @@ //! //! [emoji segmenter]: +use alloc::vec::Vec; use core::char; -use std::vec::Vec; use crate::{ analysis::AnalysisDataSources, - emoji::{ - EmojiFlags, EmojiSegmentationCategory, ScannedEmojiPresentation, scan_emoji_presentation, - }, + emoji::{EmojiDFA, EmojiFlags, EmojiPresentationStyle, EmojiSegmentationCategory}, }; struct TestEntity<'a> { sequence: &'a [u32], categories: &'a [EmojiSegmentationCategory], - scanned: ScannedEmojiPresentation, + style: EmojiPresentationStyle, } fn assert_emoji_segmenters_produce_same_result(entity: TestEntity<'_>) { @@ -28,6 +26,8 @@ fn assert_emoji_segmenters_produce_same_result(entity: TestEntity<'_>) { let emoji_component = analysis.emoji_component(); let emoji_presentation = analysis.emoji_presentation(); + let mut emoji_dfa = EmojiDFA::new(); + let result = entity .sequence .iter() @@ -50,13 +50,16 @@ fn assert_emoji_segmenters_produce_same_result(entity: TestEntity<'_>) { is_regional_indicator, ); - EmojiSegmentationCategory::from_codepoint(cp, emoji_flags) + let category = EmojiSegmentationCategory::from_codepoint(cp, emoji_flags); + + emoji_dfa.step_record(category); + + category }) .collect::>(); assert_eq!(result, entity.categories); - - assert_eq!(scan_emoji_presentation(&result), entity.scanned); + assert_eq!(emoji_dfa.presentation_style(), entity.style); } // Emoji presentation default; Encoded: 😀 @@ -67,10 +70,7 @@ fn emoji_presentation_default() { 0x1F600, // GRINNING FACE ], categories: &[EmojiSegmentationCategory::EmojiEmojiPresentation], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -82,10 +82,7 @@ fn text_presentation_default() { 0x00A9, // COPYRIGHT SIGN ], categories: &[EmojiSegmentationCategory::EmojiTextPresentation], - scanned: ScannedEmojiPresentation { - is_emoji: false, - has_vs: false, - }, + style: EmojiPresentationStyle::Text, }); } @@ -95,10 +92,7 @@ fn long_keycap_base() { assert_emoji_segmenters_produce_same_result(TestEntity { sequence: &[0x0031], // DIGIT ONE categories: &[EmojiSegmentationCategory::KeycapBase], - scanned: ScannedEmojiPresentation { - is_emoji: false, - has_vs: false, - }, + style: EmojiPresentationStyle::Default, }); } @@ -114,10 +108,7 @@ fn keycap_base_vs15() { EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::Vs15, ], - scanned: ScannedEmojiPresentation { - is_emoji: false, - has_vs: true, - }, + style: EmojiPresentationStyle::Text, }); } @@ -133,10 +124,7 @@ fn keycap_base_vs16() { EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: true, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -150,12 +138,9 @@ fn unqualified_keycap() { ], categories: &[ EmojiSegmentationCategory::KeycapBase, - EmojiSegmentationCategory::CombiningEnclosingKeycap, + EmojiSegmentationCategory::KeycapTerm, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Default, }); } @@ -171,12 +156,9 @@ fn keycap_vs15_term() { categories: &[ EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::Vs15, - EmojiSegmentationCategory::CombiningEnclosingKeycap, + EmojiSegmentationCategory::KeycapTerm, ], - scanned: ScannedEmojiPresentation { - is_emoji: false, - has_vs: true, - }, + style: EmojiPresentationStyle::Text, }); } @@ -192,12 +174,9 @@ fn qualified_keycap() { categories: &[ EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::Vs16, - EmojiSegmentationCategory::CombiningEnclosingKeycap, + EmojiSegmentationCategory::KeycapTerm, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: true, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -209,10 +188,7 @@ fn lone_emoji_modifier() { 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 ], categories: &[EmojiSegmentationCategory::EmojiModifier], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Default, }); } @@ -224,10 +200,7 @@ fn bare_modifier_base_text_default() { 0x261D, // WHITE UP POINTING INDEX ], categories: &[EmojiSegmentationCategory::EmojiModifierBaseText], - scanned: ScannedEmojiPresentation { - is_emoji: false, - has_vs: false, - }, + style: EmojiPresentationStyle::Text, }); } @@ -243,10 +216,7 @@ fn modifier_base_text_default_vs16() { EmojiSegmentationCategory::EmojiModifierBaseText, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: true, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -262,10 +232,7 @@ fn modifier_base_text_default_skin_tone() { EmojiSegmentationCategory::EmojiModifierBaseText, EmojiSegmentationCategory::EmojiModifier, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -281,10 +248,7 @@ fn modifier_base_emoji_default_skin_tone() { EmojiSegmentationCategory::EmojiModifierBaseEmoji, EmojiSegmentationCategory::EmojiModifier, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -295,11 +259,8 @@ fn lone_regional_indicator() { sequence: &[ 0x1F1FA, // REGIONAL INDICATOR SYMBOL LETTER U ], - categories: &[EmojiSegmentationCategory::RegionalIndicator], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + categories: &[EmojiSegmentationCategory::Ri], + style: EmojiPresentationStyle::Default, }); } @@ -311,14 +272,8 @@ fn flag_sequence_us() { 0x1F1FA, // REGIONAL INDICATOR SYMBOL LETTER U 0x1F1F8, // REGIONAL INDICATOR SYMBOL LETTER S ], - categories: &[ - EmojiSegmentationCategory::RegionalIndicator, - EmojiSegmentationCategory::RegionalIndicator, - ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + categories: &[EmojiSegmentationCategory::Ri, EmojiSegmentationCategory::Ri], + style: EmojiPresentationStyle::Emoji, }); } @@ -337,14 +292,11 @@ fn double_lone_regional_indicator_flag_sequence_us() { 0x1F1F8, // REGIONAL INDICATOR SYMBOL LETTER S ], categories: &[ - EmojiSegmentationCategory::RegionalIndicator, - EmojiSegmentationCategory::RegionalIndicator, - EmojiSegmentationCategory::RegionalIndicator, + EmojiSegmentationCategory::Ri, + EmojiSegmentationCategory::Ri, + EmojiSegmentationCategory::Ri, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -360,10 +312,7 @@ fn text_default_emoji_vs15() { EmojiSegmentationCategory::EmojiTextPresentation, EmojiSegmentationCategory::Vs15, ], - scanned: ScannedEmojiPresentation { - is_emoji: false, - has_vs: true, - }, + style: EmojiPresentationStyle::Text, }); } @@ -379,10 +328,7 @@ fn text_default_emoji_vs16() { EmojiSegmentationCategory::EmojiTextPresentation, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: true, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -398,10 +344,7 @@ fn emoji_default_emoji_vs15() { EmojiSegmentationCategory::EmojiEmojiPresentation, EmojiSegmentationCategory::Vs15, ], - scanned: ScannedEmojiPresentation { - is_emoji: false, - has_vs: true, - }, + style: EmojiPresentationStyle::Text, }); } @@ -417,10 +360,7 @@ fn emoji_default_emoji_vs16() { EmojiSegmentationCategory::EmojiEmojiPresentation, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: true, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -442,10 +382,7 @@ fn zwj_family() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -471,10 +408,7 @@ fn long_zwj_family() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -496,10 +430,7 @@ fn zwj_couple() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -519,10 +450,7 @@ fn zwj_with_vs16_element() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -544,10 +472,7 @@ fn zwj_with_vs16_on_both_elements() { EmojiSegmentationCategory::EmojiModifierBaseEmoji, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -567,10 +492,7 @@ fn zwj_after_modifier_sequence() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiEmojiPresentation, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -590,10 +512,7 @@ fn zwj_technologist_with_skin_tone() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiEmojiPresentation, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -613,10 +532,7 @@ fn vs16_enables_zwj_continuation() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiModifierBaseEmoji, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -635,17 +551,14 @@ fn tag_sequence_england() { ], categories: &[ EmojiSegmentationCategory::TagBase, - EmojiSegmentationCategory::TagSequence, - EmojiSegmentationCategory::TagSequence, - EmojiSegmentationCategory::TagSequence, - EmojiSegmentationCategory::TagSequence, - EmojiSegmentationCategory::TagSequence, + EmojiSegmentationCategory::TagSpec, + EmojiSegmentationCategory::TagSpec, + EmojiSegmentationCategory::TagSpec, + EmojiSegmentationCategory::TagSpec, + EmojiSegmentationCategory::TagSpec, EmojiSegmentationCategory::TagTerm, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -663,10 +576,7 @@ fn tag_base_as_zwj_element() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiEmojiPresentation, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -686,10 +596,7 @@ fn tag_base_vs16_as_zwj() { EmojiSegmentationCategory::Zwj, EmojiSegmentationCategory::EmojiEmojiPresentation, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: false, - }, + style: EmojiPresentationStyle::Emoji, }); } @@ -705,10 +612,7 @@ fn tag_base_vs15() { EmojiSegmentationCategory::TagBase, EmojiSegmentationCategory::Vs15, ], - scanned: ScannedEmojiPresentation { - is_emoji: false, - has_vs: true, - }, + style: EmojiPresentationStyle::Text, }); } @@ -724,9 +628,6 @@ fn tag_base_vs16() { EmojiSegmentationCategory::TagBase, EmojiSegmentationCategory::Vs16, ], - scanned: ScannedEmojiPresentation { - is_emoji: true, - has_vs: true, - }, + style: EmojiPresentationStyle::Emoji, }); } From bb16acba7a070e7fc4c243471fcb6abb67337cbc Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Tue, 12 May 2026 20:15:07 +0800 Subject: [PATCH 29/34] add inline --- parley/src/emoji/dfa.rs | 13 +++++++++++-- parley/src/emoji/types.rs | 11 +++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/parley/src/emoji/dfa.rs b/parley/src/emoji/dfa.rs index edd860f0c..3008c35e1 100644 --- a/parley/src/emoji/dfa.rs +++ b/parley/src/emoji/dfa.rs @@ -136,16 +136,18 @@ impl EmojiDFA { recorded: [0, 0], }; + #[inline] pub(crate) const fn new() -> Self { Self::DEFAULT } + #[inline] pub(crate) const fn step(&mut self, category: EmojiSegmentationCategory) { self.state = DFA_TRANS[self.state.as_usize()][category.as_usize()]; } - // pub(crate) const fn step_record(&mut self, category: EmojiSegmentationCategory) { - pub(crate) fn step_record(&mut self, category: EmojiSegmentationCategory) { + #[inline] + pub(crate) const fn step_record(&mut self, category: EmojiSegmentationCategory) { self.step(category); if self.is_rejected() || self.is_started() { @@ -156,15 +158,18 @@ impl EmojiDFA { self.recorded[1] |= 1 << category.as_u8(); } + #[inline] pub(crate) const fn is_rejected(&self) -> bool { self.state.eq(EmojiState::Reject) } + #[inline] pub(crate) const fn is_started(&self) -> bool { self.state.eq(EmojiState::Start) } #[allow(unused)] + #[inline] pub(crate) const fn is_accepting(&self) -> bool { const START: u8 = EmojiState::Terminal.as_u8(); const END: u8 = EmojiState::Ri.as_u8(); @@ -174,14 +179,17 @@ impl EmojiDFA { START <= cur && cur <= END } + #[inline] pub(crate) const fn contains_state(&self, state: EmojiState) -> bool { self.recorded[0] & (1 << state.as_u8()) != 0 } + #[inline] pub(crate) const fn contains_category(&self, category: EmojiSegmentationCategory) -> bool { self.recorded[1] & (1 << category.as_u8()) != 0 } + #[inline] pub(crate) const fn sequence(&self) -> EmojiSequence { if self.contains_category(EmojiSegmentationCategory::Zwj) { return EmojiSequence::Zwj; @@ -221,6 +229,7 @@ impl EmojiDFA { EmojiSequence::Basic } + #[inline] pub(crate) const fn presentation_style(&self) -> EmojiPresentationStyle { if self.contains_category(EmojiSegmentationCategory::Vs15) { return EmojiPresentationStyle::Text; diff --git a/parley/src/emoji/types.rs b/parley/src/emoji/types.rs index d7b3994dd..405807e58 100644 --- a/parley/src/emoji/types.rs +++ b/parley/src/emoji/types.rs @@ -103,14 +103,17 @@ pub(crate) enum EmojiState { } impl EmojiState { + #[inline] pub(crate) const fn as_usize(self) -> usize { self as usize } + #[inline] pub(crate) const fn as_u8(self) -> u8 { self as u8 } + #[inline] pub(crate) const fn eq(self, other: Self) -> bool { self.as_u8() == other.as_u8() } @@ -203,14 +206,17 @@ impl EmojiSegmentationCategory { } } + #[inline] pub(crate) const fn as_usize(self) -> usize { self as usize } + #[inline] pub(crate) const fn as_u8(self) -> u8 { self as u8 } + #[inline] pub(crate) const fn eq(self, other: Self) -> bool { self.as_u8() == other.as_u8() } @@ -244,10 +250,12 @@ pub(crate) enum EmojiSequence { } impl EmojiSequence { + #[inline] pub(crate) const fn as_u8(self) -> u8 { self as u8 } + #[inline] pub(crate) const fn eq(self, other: Self) -> bool { self.as_u8() == other.as_u8() } @@ -263,14 +271,17 @@ pub(crate) enum EmojiPresentationStyle { } impl EmojiPresentationStyle { + #[inline] pub(crate) const fn is_emoji(self) -> bool { self.eq(Self::Emoji) } + #[inline] pub(crate) const fn as_u8(self) -> u8 { self as u8 } + #[inline] pub(crate) const fn eq(self, other: Self) -> bool { self.as_u8() == other.as_u8() } From a968862d4c378d39ed262af5af225bc13bc9f6a2 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Tue, 12 May 2026 20:19:48 +0800 Subject: [PATCH 30/34] add copyright --- parley/src/emoji/dfa.rs | 5 +++-- parley/src/emoji/types.rs | 3 +++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/parley/src/emoji/dfa.rs b/parley/src/emoji/dfa.rs index 3008c35e1..36814307e 100644 --- a/parley/src/emoji/dfa.rs +++ b/parley/src/emoji/dfa.rs @@ -1,6 +1,7 @@ -use crate::emoji::types::EmojiSequence; +// Copyright 2026 the Parley Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT -use super::types::{EmojiPresentationStyle, EmojiSegmentationCategory, EmojiState}; +use super::types::{EmojiPresentationStyle, EmojiSegmentationCategory, EmojiSequence, EmojiState}; /// The transition table for Emoji DFA. /// diff --git a/parley/src/emoji/types.rs b/parley/src/emoji/types.rs index 405807e58..9a8575361 100644 --- a/parley/src/emoji/types.rs +++ b/parley/src/emoji/types.rs @@ -1,3 +1,6 @@ +// Copyright 2026 the Parley Authors +// SPDX-License-Identifier: Apache-2.0 OR MIT + /// Flags are used to identify [`EmojiSegmentationCategory`]. #[derive(Clone, Copy, Default)] pub(crate) struct EmojiFlags(u32); From f4173b073a38aed1328676b94bf988c966d57816 Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Tue, 12 May 2026 20:29:07 +0800 Subject: [PATCH 31/34] clippy --- parley/src/emoji/dfa.rs | 14 +++++++------- parley/src/emoji/types.rs | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/parley/src/emoji/dfa.rs b/parley/src/emoji/dfa.rs index 36814307e..169d36b20 100644 --- a/parley/src/emoji/dfa.rs +++ b/parley/src/emoji/dfa.rs @@ -160,18 +160,18 @@ impl EmojiDFA { } #[inline] - pub(crate) const fn is_rejected(&self) -> bool { + pub(crate) const fn is_rejected(self) -> bool { self.state.eq(EmojiState::Reject) } #[inline] - pub(crate) const fn is_started(&self) -> bool { + pub(crate) const fn is_started(self) -> bool { self.state.eq(EmojiState::Start) } #[allow(unused)] #[inline] - pub(crate) const fn is_accepting(&self) -> bool { + pub(crate) const fn is_accepting(self) -> bool { const START: u8 = EmojiState::Terminal.as_u8(); const END: u8 = EmojiState::Ri.as_u8(); @@ -181,17 +181,17 @@ impl EmojiDFA { } #[inline] - pub(crate) const fn contains_state(&self, state: EmojiState) -> bool { + pub(crate) const fn contains_state(self, state: EmojiState) -> bool { self.recorded[0] & (1 << state.as_u8()) != 0 } #[inline] - pub(crate) const fn contains_category(&self, category: EmojiSegmentationCategory) -> bool { + pub(crate) const fn contains_category(self, category: EmojiSegmentationCategory) -> bool { self.recorded[1] & (1 << category.as_u8()) != 0 } #[inline] - pub(crate) const fn sequence(&self) -> EmojiSequence { + pub(crate) const fn sequence(self) -> EmojiSequence { if self.contains_category(EmojiSegmentationCategory::Zwj) { return EmojiSequence::Zwj; } @@ -231,7 +231,7 @@ impl EmojiDFA { } #[inline] - pub(crate) const fn presentation_style(&self) -> EmojiPresentationStyle { + pub(crate) const fn presentation_style(self) -> EmojiPresentationStyle { if self.contains_category(EmojiSegmentationCategory::Vs15) { return EmojiPresentationStyle::Text; } diff --git a/parley/src/emoji/types.rs b/parley/src/emoji/types.rs index 9a8575361..05eaa580f 100644 --- a/parley/src/emoji/types.rs +++ b/parley/src/emoji/types.rs @@ -96,7 +96,7 @@ pub(crate) enum EmojiState { OptionalZwj, KeycapVs, TagBase, - /// RegionalIndicator + /// `RegionalIndicator` Ri, TagSpec, @@ -148,7 +148,7 @@ pub(crate) enum EmojiSegmentationCategory { EmojiModifierBaseText, EmojiModifierBaseEmoji, EmojiModifier, - /// RegionalIndicator + /// `RegionalIndicator` Ri, KeycapBase, KeycapTerm, From 35dda21687be1a15d5325ab5c6cad0c845b1aa6e Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Wed, 13 May 2026 14:24:23 +0800 Subject: [PATCH 32/34] add state macro --- parley/src/emoji/dfa.rs | 130 +++++++++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 48 deletions(-) diff --git a/parley/src/emoji/dfa.rs b/parley/src/emoji/dfa.rs index 169d36b20..9f9368aae 100644 --- a/parley/src/emoji/dfa.rs +++ b/parley/src/emoji/dfa.rs @@ -12,35 +12,42 @@ static DFA_TRANS: [[EmojiState; 16]; 15] = { let mut t = [[State::Reject; 16]; 15]; + // Add a state transition to the DFA transition table. + macro_rules! add { + ($state:expr, $category:expr, $next_state:expr) => { + t[$state.as_usize()][$category.as_usize()] = $next_state + }; + } + { - t[State::Start.as_usize()][Category::None.as_usize()] = State::Start; - t[State::Start.as_usize()][Category::KeycapTerm.as_usize()] = State::Start; - t[State::Start.as_usize()][Category::Zwj.as_usize()] = State::Start; - t[State::Start.as_usize()][Category::Vs15.as_usize()] = State::Start; - t[State::Start.as_usize()][Category::Vs16.as_usize()] = State::Start; - t[State::Start.as_usize()][Category::TagSpec.as_usize()] = State::Start; - t[State::Start.as_usize()][Category::TagTerm.as_usize()] = State::Start; + add!(State::Start, Category::None, State::Start); + add!(State::Start, Category::KeycapTerm, State::Start); + add!(State::Start, Category::Zwj, State::Start); + add!(State::Start, Category::Vs15, State::Start); + add!(State::Start, Category::Vs16, State::Start); + add!(State::Start, Category::TagSpec, State::Start); + add!(State::Start, Category::TagTerm, State::Start); } // Text and Emoji presentation sequences { - t[State::Start.as_usize()][Category::Emoji.as_usize()] = State::Emoji; + add!(State::Start, Category::Emoji, State::Emoji); - t[State::Start.as_usize()][Category::EmojiTextPresentation.as_usize()] = State::Emoji; - t[State::Start.as_usize()][Category::EmojiEmojiPresentation.as_usize()] = State::Emoji; + add!(State::Start, Category::EmojiTextPresentation, State::Emoji); + add!(State::Start, Category::EmojiEmojiPresentation, State::Emoji); // Text presentation sequence // // - t[State::Emoji.as_usize()][Category::Vs15.as_usize()] = State::Terminal; + add!(State::Emoji, Category::Vs15, State::Terminal); // Emoji presentation sequence // // - t[State::Emoji.as_usize()][Category::Vs16.as_usize()] = State::OptionalZwj; + add!(State::Emoji, Category::Vs16, State::OptionalZwj); // ZWJ - t[State::Emoji.as_usize()][Category::Zwj.as_usize()] = State::Zwj; + add!(State::Emoji, Category::Zwj, State::Zwj); } // Emoji modifier sequence @@ -48,77 +55,104 @@ static DFA_TRANS: [[EmojiState; 16]; 15] = { // { // text - t[State::Start.as_usize()][Category::EmojiModifierBaseText.as_usize()] = - State::EmojiModifierBaseText; - - t[State::EmojiModifierBaseText.as_usize()][Category::Vs15.as_usize()] = State::Terminal; - t[State::EmojiModifierBaseText.as_usize()][Category::Vs16.as_usize()] = State::Terminal; - t[State::EmojiModifierBaseText.as_usize()][Category::EmojiModifier.as_usize()] = - State::OptionalZwj; + add!( + State::Start, + Category::EmojiModifierBaseText, + State::EmojiModifierBaseText + ); + + add!( + State::EmojiModifierBaseText, + Category::Vs15, + State::Terminal + ); + add!( + State::EmojiModifierBaseText, + Category::Vs16, + State::Terminal + ); + add!( + State::EmojiModifierBaseText, + Category::EmojiModifier, + State::OptionalZwj + ); // emoji - t[State::Start.as_usize()][Category::EmojiModifierBaseEmoji.as_usize()] = - State::EmojiModifierBaseEmoji; - - t[State::EmojiModifierBaseEmoji.as_usize()][Category::Vs16.as_usize()] = State::OptionalZwj; - t[State::EmojiModifierBaseEmoji.as_usize()][Category::Zwj.as_usize()] = State::Zwj; - t[State::EmojiModifierBaseEmoji.as_usize()][Category::EmojiModifier.as_usize()] = - State::OptionalZwj; + add!( + State::Start, + Category::EmojiModifierBaseEmoji, + State::EmojiModifierBaseEmoji + ); + + add!( + State::EmojiModifierBaseEmoji, + Category::Vs16, + State::OptionalZwj + ); + add!(State::EmojiModifierBaseEmoji, Category::Zwj, State::Zwj); + add!( + State::EmojiModifierBaseEmoji, + Category::EmojiModifier, + State::OptionalZwj + ); // other - t[State::Start.as_usize()][Category::EmojiModifier.as_usize()] = State::Terminal; + add!(State::Start, Category::EmojiModifier, State::Terminal); } // Emoji flag sequence -- A sequence of two Regional Indicator characters. // // { - t[State::Start.as_usize()][Category::Ri.as_usize()] = State::Ri; + add!(State::Start, Category::Ri, State::Ri); - t[State::Ri.as_usize()][Category::Ri.as_usize()] = State::Terminal; + add!(State::Ri, Category::Ri, State::Terminal); } // Emoji tag sequence (ETS). // // { - t[State::Start.as_usize()][Category::TagBase.as_usize()] = State::TagBase; + add!(State::Start, Category::TagBase, State::TagBase); - t[State::TagBase.as_usize()][Category::Vs15.as_usize()] = State::Terminal; - t[State::TagBase.as_usize()][Category::Vs16.as_usize()] = State::OptionalZwj; - t[State::TagBase.as_usize()][Category::TagSpec.as_usize()] = State::TagSpec; - t[State::TagBase.as_usize()][Category::TagTerm.as_usize()] = State::TagEmpty; // without any `TagSpec` - t[State::TagBase.as_usize()][Category::Zwj.as_usize()] = State::Zwj; + add!(State::TagBase, Category::Vs15, State::Terminal); + add!(State::TagBase, Category::Vs16, State::OptionalZwj); + add!(State::TagBase, Category::TagSpec, State::TagSpec); + add!(State::TagBase, Category::TagTerm, State::TagEmpty); // without any `TagSpec` + add!(State::TagBase, Category::Zwj, State::Zwj); // (seq)+ - t[State::TagSpec.as_usize()][Category::TagSpec.as_usize()] = State::TagSpec; - t[State::TagSpec.as_usize()][Category::TagTerm.as_usize()] = State::Terminal; + add!(State::TagSpec, Category::TagSpec, State::TagSpec); + add!(State::TagSpec, Category::TagTerm, State::Terminal); } // Emoji keycap sequence. // // { - t[State::Start.as_usize()][Category::KeycapBase.as_usize()] = State::KeycapBase; + add!(State::Start, Category::KeycapBase, State::KeycapBase); - t[State::KeycapBase.as_usize()][Category::KeycapTerm.as_usize()] = State::Terminal; - t[State::KeycapBase.as_usize()][Category::Vs15.as_usize()] = State::KeycapVs; - t[State::KeycapBase.as_usize()][Category::Vs16.as_usize()] = State::KeycapVs; + add!(State::KeycapBase, Category::KeycapTerm, State::Terminal); + add!(State::KeycapBase, Category::Vs15, State::KeycapVs); + add!(State::KeycapBase, Category::Vs16, State::KeycapVs); - t[State::KeycapVs.as_usize()][Category::KeycapTerm.as_usize()] = State::Terminal; + add!(State::KeycapVs, Category::KeycapTerm, State::Terminal); } // Emoji ZWJ sequence. // // { - t[State::OptionalZwj.as_usize()][Category::Zwj.as_usize()] = State::Zwj; + add!(State::OptionalZwj, Category::Zwj, State::Zwj); // (zwj emoji_zwj_element)+ - t[State::Zwj.as_usize()][Category::Emoji.as_usize()] = State::Emoji; - t[State::Zwj.as_usize()][Category::EmojiEmojiPresentation.as_usize()] = State::Emoji; - t[State::Zwj.as_usize()][Category::EmojiModifierBaseEmoji.as_usize()] = - State::EmojiModifierBaseEmoji; + add!(State::Zwj, Category::Emoji, State::Emoji); + add!(State::Zwj, Category::EmojiEmojiPresentation, State::Emoji); + add!( + State::Zwj, + Category::EmojiModifierBaseEmoji, + State::EmojiModifierBaseEmoji + ); } t From 25774db4e289219e0f9710890ce5c55998feba7f Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Thu, 14 May 2026 14:01:10 +0800 Subject: [PATCH 33/34] delete redundant types --- parley/src/analysis/mod.rs | 9 +- parley/src/emoji/dfa.rs | 110 +++++++------------ parley/src/emoji/types.rs | 123 ++++++++++------------ parley/src/shape/mod.rs | 7 +- parley/src/tests/test_emoji_segmenters.rs | 88 ++++++++-------- 5 files changed, 139 insertions(+), 198 deletions(-) diff --git a/parley/src/analysis/mod.rs b/parley/src/analysis/mod.rs index f165fac1c..4974dd782 100644 --- a/parley/src/analysis/mod.rs +++ b/parley/src/analysis/mod.rs @@ -14,8 +14,8 @@ use icu_normalizer::properties::{ CanonicalDecompositionBorrowed, }; use icu_properties::props::{ - BidiMirroringGlyph, EmojiComponent, EmojiModifier, EmojiModifierBase, EmojiPresentation, - GeneralCategory, GraphemeClusterBreak, Script, + BidiMirroringGlyph, EmojiModifier, EmojiModifierBase, EmojiPresentation, GeneralCategory, + GraphemeClusterBreak, Script, }; use icu_properties::{ CodePointMapData, CodePointMapDataBorrowed, CodePointSetData, CodePointSetDataBorrowed, @@ -107,11 +107,6 @@ impl AnalysisDataSources { const { CodePointSetData::new::() } } - #[inline(always)] - pub(crate) fn emoji_component(&self) -> CodePointSetDataBorrowed<'_> { - const { CodePointSetData::new::() } - } - #[inline(always)] pub(crate) fn emoji_presentation(&self) -> CodePointSetDataBorrowed<'_> { const { CodePointSetData::new::() } diff --git a/parley/src/emoji/dfa.rs b/parley/src/emoji/dfa.rs index 9f9368aae..fe8be1846 100644 --- a/parley/src/emoji/dfa.rs +++ b/parley/src/emoji/dfa.rs @@ -6,35 +6,24 @@ use super::types::{EmojiPresentationStyle, EmojiSegmentationCategory, EmojiSeque /// The transition table for Emoji DFA. /// /// -static DFA_TRANS: [[EmojiState; 16]; 15] = { +static DFA_TRANS: [[u8; 13]; 14] = { use EmojiSegmentationCategory as Category; use EmojiState as State; - let mut t = [[State::Reject; 16]; 15]; + let mut t = [[0; 13]; 14]; - // Add a state transition to the DFA transition table. + /// Add a state transition to the DFA transition table. macro_rules! add { ($state:expr, $category:expr, $next_state:expr) => { - t[$state.as_usize()][$category.as_usize()] = $next_state + t[$state.as_usize()][$category.as_usize()] = $next_state.as_u8() }; } - { - add!(State::Start, Category::None, State::Start); - add!(State::Start, Category::KeycapTerm, State::Start); - add!(State::Start, Category::Zwj, State::Start); - add!(State::Start, Category::Vs15, State::Start); - add!(State::Start, Category::Vs16, State::Start); - add!(State::Start, Category::TagSpec, State::Start); - add!(State::Start, Category::TagTerm, State::Start); - } - // Text and Emoji presentation sequences { add!(State::Start, Category::Emoji, State::Emoji); - add!(State::Start, Category::EmojiTextPresentation, State::Emoji); - add!(State::Start, Category::EmojiEmojiPresentation, State::Emoji); + add!(State::Start, Category::EmojiPresentation, State::Emoji); // Text presentation sequence // @@ -54,44 +43,16 @@ static DFA_TRANS: [[EmojiState; 16]; 15] = { // // { - // text - add!( - State::Start, - Category::EmojiModifierBaseText, - State::EmojiModifierBaseText - ); - - add!( - State::EmojiModifierBaseText, - Category::Vs15, - State::Terminal - ); - add!( - State::EmojiModifierBaseText, - Category::Vs16, - State::Terminal - ); - add!( - State::EmojiModifierBaseText, - Category::EmojiModifier, - State::OptionalZwj - ); - - // emoji add!( State::Start, - Category::EmojiModifierBaseEmoji, - State::EmojiModifierBaseEmoji + Category::EmojiModifierBase, + State::EmojiModifierBase ); + add!(State::EmojiModifierBase, Category::Vs16, State::OptionalZwj); + add!(State::EmojiModifierBase, Category::Zwj, State::Zwj); add!( - State::EmojiModifierBaseEmoji, - Category::Vs16, - State::OptionalZwj - ); - add!(State::EmojiModifierBaseEmoji, Category::Zwj, State::Zwj); - add!( - State::EmojiModifierBaseEmoji, + State::EmojiModifierBase, Category::EmojiModifier, State::OptionalZwj ); @@ -118,12 +79,12 @@ static DFA_TRANS: [[EmojiState; 16]; 15] = { add!(State::TagBase, Category::Vs15, State::Terminal); add!(State::TagBase, Category::Vs16, State::OptionalZwj); add!(State::TagBase, Category::TagSpec, State::TagSpec); - add!(State::TagBase, Category::TagTerm, State::TagEmpty); // without any `TagSpec` + add!(State::TagBase, Category::TagEnd, State::TagEmpty); // without any `TagSpec` add!(State::TagBase, Category::Zwj, State::Zwj); // (seq)+ add!(State::TagSpec, Category::TagSpec, State::TagSpec); - add!(State::TagSpec, Category::TagTerm, State::Terminal); + add!(State::TagSpec, Category::TagEnd, State::Terminal); } // Emoji keycap sequence. @@ -132,11 +93,11 @@ static DFA_TRANS: [[EmojiState; 16]; 15] = { { add!(State::Start, Category::KeycapBase, State::KeycapBase); - add!(State::KeycapBase, Category::KeycapTerm, State::Terminal); + add!(State::KeycapBase, Category::KeycapEnd, State::Terminal); add!(State::KeycapBase, Category::Vs15, State::KeycapVs); add!(State::KeycapBase, Category::Vs16, State::KeycapVs); - add!(State::KeycapVs, Category::KeycapTerm, State::Terminal); + add!(State::KeycapVs, Category::KeycapEnd, State::Terminal); } // Emoji ZWJ sequence. @@ -147,11 +108,11 @@ static DFA_TRANS: [[EmojiState; 16]; 15] = { // (zwj emoji_zwj_element)+ add!(State::Zwj, Category::Emoji, State::Emoji); - add!(State::Zwj, Category::EmojiEmojiPresentation, State::Emoji); + add!(State::Zwj, Category::EmojiPresentation, State::Emoji); add!( State::Zwj, - Category::EmojiModifierBaseEmoji, - State::EmojiModifierBaseEmoji + Category::EmojiModifierBase, + State::EmojiModifierBase ); } @@ -161,14 +122,14 @@ static DFA_TRANS: [[EmojiState; 16]; 15] = { #[derive(Clone, Copy, Debug)] pub(crate) struct EmojiDFA { state: EmojiState, - // [state, category] - recorded: [u16; 2], + // (state, category) + recorded: (u16, u16), } impl EmojiDFA { const DEFAULT: Self = Self { state: EmojiState::Start, - recorded: [0, 0], + recorded: (0, 0), }; #[inline] @@ -178,7 +139,7 @@ impl EmojiDFA { #[inline] pub(crate) const fn step(&mut self, category: EmojiSegmentationCategory) { - self.state = DFA_TRANS[self.state.as_usize()][category.as_usize()]; + self.state = EmojiState::from_u8(DFA_TRANS[self.state.as_usize()][category.as_usize()]); } #[inline] @@ -189,8 +150,8 @@ impl EmojiDFA { return; } - self.recorded[0] |= 1 << self.state.as_u8(); - self.recorded[1] |= 1 << category.as_u8(); + self.recorded.0 |= 1 << self.state.as_u8(); + self.recorded.1 |= 1 << category.as_u8(); } #[inline] @@ -216,12 +177,12 @@ impl EmojiDFA { #[inline] pub(crate) const fn contains_state(self, state: EmojiState) -> bool { - self.recorded[0] & (1 << state.as_u8()) != 0 + self.recorded.0 & (1 << state.as_u8()) != 0 } #[inline] pub(crate) const fn contains_category(self, category: EmojiSegmentationCategory) -> bool { - self.recorded[1] & (1 << category.as_u8()) != 0 + self.recorded.1 & (1 << category.as_u8()) != 0 } #[inline] @@ -241,8 +202,7 @@ impl EmojiDFA { return EmojiSequence::Flag; } - if (self.contains_category(EmojiSegmentationCategory::EmojiModifierBaseEmoji) - || self.contains_category(EmojiSegmentationCategory::EmojiModifierBaseText)) + if self.contains_category(EmojiSegmentationCategory::EmojiModifierBase) && self.contains_category(EmojiSegmentationCategory::EmojiModifier) { return EmojiSequence::Modifier; @@ -250,12 +210,12 @@ impl EmojiDFA { if self.contains_category(EmojiSegmentationCategory::KeycapBase) && self.contains_category(EmojiSegmentationCategory::Vs16) - && self.contains_category(EmojiSegmentationCategory::KeycapTerm) + && self.contains_category(EmojiSegmentationCategory::KeycapEnd) { return EmojiSequence::Keycap; } - if self.contains_category(EmojiSegmentationCategory::KeycapTerm) + if self.contains_category(EmojiSegmentationCategory::KeycapEnd) && self.contains_category(EmojiSegmentationCategory::Vs16) { return EmojiSequence::Keycap; @@ -273,10 +233,7 @@ impl EmojiDFA { return EmojiPresentationStyle::Emoji; } - if self.contains_category(EmojiSegmentationCategory::EmojiTextPresentation) { - return EmojiPresentationStyle::Text; - } - if self.contains_category(EmojiSegmentationCategory::EmojiEmojiPresentation) { + if self.contains_category(EmojiSegmentationCategory::EmojiPresentation) { return EmojiPresentationStyle::Emoji; } @@ -284,8 +241,13 @@ impl EmojiDFA { return EmojiPresentationStyle::Emoji; } - // single emoji character - if self.contains_category(EmojiSegmentationCategory::EmojiModifierBaseText) { + // single emoji modifier; e.g. 🏻 + if self.contains_category(EmojiSegmentationCategory::EmojiModifier) { + return EmojiPresentationStyle::Emoji; + } + + // single emoji modifier base; e.g ☝ + if self.contains_category(EmojiSegmentationCategory::EmojiModifierBase) { return EmojiPresentationStyle::Text; } diff --git a/parley/src/emoji/types.rs b/parley/src/emoji/types.rs index 05eaa580f..9bdbc3de1 100644 --- a/parley/src/emoji/types.rs +++ b/parley/src/emoji/types.rs @@ -7,46 +7,31 @@ pub(crate) struct EmojiFlags(u32); impl EmojiFlags { const EMOJI_SHIFT: u32 = 0; - const EMOJI_MODIFIER_SHIFT: u32 = 1; - const EMOJI_MODIFIER_BASE_SHIFT: u32 = 2; - const EMOJI_PRESENTATION_SHIFT: u32 = 3; - const EMOJI_COMPONENT_SHIFT: u32 = 4; - const REGIONAL_INDICATOR_SHIFT: u32 = 5; + const EMOJI_PRESENTATION_SHIFT: u32 = 1; + const EMOJI_MODIFIER_SHIFT: u32 = 2; + const EMOJI_MODIFIER_BASE_SHIFT: u32 = 3; + const REGIONAL_INDICATOR_SHIFT: u32 = 4; const EMOJI_MASK: u32 = 1 << Self::EMOJI_SHIFT; + const EMOJI_PRESENTATION_MASK: u32 = 1 << Self::EMOJI_PRESENTATION_SHIFT; const EMOJI_MODIFIER_MASK: u32 = 1 << Self::EMOJI_MODIFIER_SHIFT; const EMOJI_MODIFIER_BASE_MASK: u32 = 1 << Self::EMOJI_MODIFIER_BASE_SHIFT; - const EMOJI_PRESENTATION_MASK: u32 = 1 << Self::EMOJI_PRESENTATION_SHIFT; - #[allow(unused)] - const EMOJI_COMPONENT_MASK: u32 = 1 << Self::EMOJI_COMPONENT_SHIFT; const REGIONAL_INDICATOR_MASK: u32 = 1 << Self::REGIONAL_INDICATOR_SHIFT; #[inline] - pub(crate) const fn new() -> Self { - Self(0) - } - - #[inline] - pub(crate) const fn with_emoji(mut self, is_emoji: bool) -> Self { - self.0 |= (is_emoji as u32) << Self::EMOJI_SHIFT; - self - } - - #[inline] - pub(crate) const fn with_extra( - mut self, + pub(crate) const fn new( + is_emoji: bool, + is_emoji_presentation: bool, is_emoji_modifier: bool, is_emoji_modifier_base: bool, - is_emoji_presentation: bool, - is_emoji_component: bool, is_regional_indicator: bool, ) -> Self { - self.0 |= (is_emoji_modifier as u32) << Self::EMOJI_MODIFIER_SHIFT; - self.0 |= (is_emoji_modifier_base as u32) << Self::EMOJI_MODIFIER_BASE_SHIFT; - self.0 |= (is_emoji_presentation as u32) << Self::EMOJI_PRESENTATION_SHIFT; - self.0 |= (is_emoji_component as u32) << Self::EMOJI_COMPONENT_SHIFT; - self.0 |= (is_regional_indicator as u32) << Self::REGIONAL_INDICATOR_SHIFT; - self + let flags = (is_emoji as u32) << Self::EMOJI_SHIFT + | (is_emoji_presentation as u32) << Self::EMOJI_PRESENTATION_SHIFT + | (is_emoji_modifier as u32) << Self::EMOJI_MODIFIER_SHIFT + | (is_emoji_modifier_base as u32) << Self::EMOJI_MODIFIER_BASE_SHIFT + | (is_regional_indicator as u32) << Self::REGIONAL_INDICATOR_SHIFT; + Self(flags) } #[inline] @@ -54,6 +39,11 @@ impl EmojiFlags { self.0 & Self::EMOJI_MASK != 0 } + #[inline] + pub(crate) const fn is_emoji_presentation(self) -> bool { + self.0 & Self::EMOJI_PRESENTATION_MASK != 0 + } + #[inline] pub(crate) const fn is_emoji_modifier(self) -> bool { self.0 & Self::EMOJI_MODIFIER_MASK != 0 @@ -64,17 +54,6 @@ impl EmojiFlags { self.0 & Self::EMOJI_MODIFIER_BASE_MASK != 0 } - #[inline] - pub(crate) const fn is_emoji_presentation(self) -> bool { - self.0 & Self::EMOJI_PRESENTATION_MASK != 0 - } - - #[allow(unused)] - #[inline] - pub(crate) const fn is_emoji_component(self) -> bool { - self.0 & Self::EMOJI_COMPONENT_MASK != 0 - } - #[inline] pub(crate) const fn is_regional_indicator(self) -> bool { self.0 & Self::REGIONAL_INDICATOR_MASK != 0 @@ -89,10 +68,7 @@ pub(crate) enum EmojiState { Terminal, Emoji, - #[allow(unused)] - EmojiModifier, - EmojiModifierBaseText, - EmojiModifierBaseEmoji, + EmojiModifierBase, OptionalZwj, KeycapVs, TagBase, @@ -106,6 +82,25 @@ pub(crate) enum EmojiState { } impl EmojiState { + #[inline] + pub(crate) const fn from_u8(value: u8) -> Self { + match value { + 1 => Self::Start, + 2 => Self::Terminal, + 3 => Self::Emoji, + 4 => Self::EmojiModifierBase, + 5 => Self::OptionalZwj, + 6 => Self::KeycapVs, + 7 => Self::TagBase, + 8 => Self::Ri, + 9 => Self::TagSpec, + 10 => Self::TagEmpty, + 11 => Self::KeycapBase, + 12 => Self::Zwj, + _ => Self::Reject, + } + } + #[inline] pub(crate) const fn as_usize(self) -> usize { self as usize @@ -143,21 +138,19 @@ impl core::ops::IndexMut for [T] { #[derive(Clone, Copy, Debug, PartialEq)] pub(crate) enum EmojiSegmentationCategory { Emoji = 0, - EmojiTextPresentation, - EmojiEmojiPresentation, - EmojiModifierBaseText, - EmojiModifierBaseEmoji, + EmojiPresentation, EmojiModifier, + EmojiModifierBase, + KeycapBase, + KeycapEnd, + TagBase, + TagSpec, + TagEnd, /// `RegionalIndicator` Ri, - KeycapBase, - KeycapTerm, - Zwj, Vs15, Vs16, - TagBase, - TagSpec, - TagTerm, + Zwj, None, } @@ -171,36 +164,30 @@ impl EmojiSegmentationCategory { // '0'..'9', '#', '*' 0x30..=0x39 | 0x23 | 0x2A => Self::KeycapBase, 0x200D => Self::Zwj, - 0x20E3 => Self::KeycapTerm, + 0x20E3 => Self::KeycapEnd, 0xFE0E => Self::Vs15, 0xFE0F => Self::Vs16, 0x1F3F4 => Self::TagBase, 0xE0030..=0xE0039 | 0xE0061..=0xE007A => Self::TagSpec, - 0xE007F => Self::TagTerm, + 0xE007F => Self::TagEnd, _ => { + if flags.is_regional_indicator() { + return Self::Ri; + } + if flags.is_emoji_modifier_base() { - if flags.is_emoji_presentation() { - return Self::EmojiModifierBaseEmoji; - } - return Self::EmojiModifierBaseText; + return Self::EmojiModifierBase; } if flags.is_emoji_modifier() { return Self::EmojiModifier; } - if flags.is_regional_indicator() { - return Self::Ri; - } - if flags.is_emoji_presentation() { - return Self::EmojiEmojiPresentation; + return Self::EmojiPresentation; } if flags.is_emoji() { - if !flags.is_emoji_presentation() { - return Self::EmojiTextPresentation; - } return Self::Emoji; } diff --git a/parley/src/shape/mod.rs b/parley/src/shape/mod.rs index 565fcb01c..6e4b520f3 100644 --- a/parley/src/shape/mod.rs +++ b/parley/src/shape/mod.rs @@ -262,16 +262,15 @@ fn fill_cluster_in_place( if is_emoji { let emoji_modifier = analysis_data_sources.emoji_modifier(); let emoji_modifier_base = analysis_data_sources.emoji_modifier_base(); - let emoji_component = analysis_data_sources.emoji_component(); let emoji_presentation = analysis_data_sources.emoji_presentation(); let category = EmojiSegmentationCategory::from_codepoint( ch as u32, - EmojiFlags::new().with_emoji(is_emoji).with_extra( + EmojiFlags::new( + is_emoji, + emoji_presentation.contains(ch), emoji_modifier.contains(ch), emoji_modifier_base.contains(ch), - emoji_presentation.contains(ch), - emoji_component.contains(ch), info.is_region_indicator(), ), ); diff --git a/parley/src/tests/test_emoji_segmenters.rs b/parley/src/tests/test_emoji_segmenters.rs index 5f240ce4d..14ea0635d 100644 --- a/parley/src/tests/test_emoji_segmenters.rs +++ b/parley/src/tests/test_emoji_segmenters.rs @@ -21,10 +21,9 @@ struct TestEntity<'a> { fn assert_emoji_segmenters_produce_same_result(entity: TestEntity<'_>) { let analysis = AnalysisDataSources::new(); + let emoji_presentation = analysis.emoji_presentation(); let emoji_modifier = analysis.emoji_modifier(); let emoji_modifier_base = analysis.emoji_modifier_base(); - let emoji_component = analysis.emoji_component(); - let emoji_presentation = analysis.emoji_presentation(); let mut emoji_dfa = EmojiDFA::new(); @@ -36,17 +35,16 @@ fn assert_emoji_segmenters_produce_same_result(entity: TestEntity<'_>) { let props = analysis.properties(char::from_u32(cp).unwrap()); let is_emoji = props.is_emoji_or_pictograph(); + let is_emoji_presentation = emoji_presentation.contains32(cp); let is_emoji_modifier = emoji_modifier.contains32(cp); let is_emoji_modifier_base = emoji_modifier_base.contains32(cp); - let is_emoji_presentation = emoji_presentation.contains32(cp); - let is_emoji_component = emoji_component.contains32(cp); let is_regional_indicator = props.is_region_indicator(); - let emoji_flags = EmojiFlags::new().with_emoji(is_emoji).with_extra( + let emoji_flags = EmojiFlags::new( + is_emoji, + is_emoji_presentation, is_emoji_modifier, is_emoji_modifier_base, - is_emoji_presentation, - is_emoji_component, is_regional_indicator, ); @@ -69,7 +67,7 @@ fn emoji_presentation_default() { sequence: &[ 0x1F600, // GRINNING FACE ], - categories: &[EmojiSegmentationCategory::EmojiEmojiPresentation], + categories: &[EmojiSegmentationCategory::EmojiPresentation], style: EmojiPresentationStyle::Emoji, }); } @@ -81,8 +79,8 @@ fn text_presentation_default() { sequence: &[ 0x00A9, // COPYRIGHT SIGN ], - categories: &[EmojiSegmentationCategory::EmojiTextPresentation], - style: EmojiPresentationStyle::Text, + categories: &[EmojiSegmentationCategory::Emoji], + style: EmojiPresentationStyle::Default, }); } @@ -138,7 +136,7 @@ fn unqualified_keycap() { ], categories: &[ EmojiSegmentationCategory::KeycapBase, - EmojiSegmentationCategory::KeycapTerm, + EmojiSegmentationCategory::KeycapEnd, ], style: EmojiPresentationStyle::Default, }); @@ -156,7 +154,7 @@ fn keycap_vs15_term() { categories: &[ EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::Vs15, - EmojiSegmentationCategory::KeycapTerm, + EmojiSegmentationCategory::KeycapEnd, ], style: EmojiPresentationStyle::Text, }); @@ -174,7 +172,7 @@ fn qualified_keycap() { categories: &[ EmojiSegmentationCategory::KeycapBase, EmojiSegmentationCategory::Vs16, - EmojiSegmentationCategory::KeycapTerm, + EmojiSegmentationCategory::KeycapEnd, ], style: EmojiPresentationStyle::Emoji, }); @@ -188,7 +186,7 @@ fn lone_emoji_modifier() { 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 ], categories: &[EmojiSegmentationCategory::EmojiModifier], - style: EmojiPresentationStyle::Default, + style: EmojiPresentationStyle::Emoji, }); } @@ -199,7 +197,7 @@ fn bare_modifier_base_text_default() { sequence: &[ 0x261D, // WHITE UP POINTING INDEX ], - categories: &[EmojiSegmentationCategory::EmojiModifierBaseText], + categories: &[EmojiSegmentationCategory::EmojiModifierBase], style: EmojiPresentationStyle::Text, }); } @@ -213,7 +211,7 @@ fn modifier_base_text_default_vs16() { 0xFE0F, // VARIATION SELECTOR-16 ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseText, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Vs16, ], style: EmojiPresentationStyle::Emoji, @@ -229,7 +227,7 @@ fn modifier_base_text_default_skin_tone() { 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseText, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::EmojiModifier, ], style: EmojiPresentationStyle::Emoji, @@ -245,7 +243,7 @@ fn modifier_base_emoji_default_skin_tone() { 0x1F3FB, // EMOJI MODIFIER FITZPATRICK TYPE-1-2 ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::EmojiModifier, ], style: EmojiPresentationStyle::Emoji, @@ -309,7 +307,7 @@ fn text_default_emoji_vs15() { 0xFE0E, // VARIATION SELECTOR-15 ], categories: &[ - EmojiSegmentationCategory::EmojiTextPresentation, + EmojiSegmentationCategory::Emoji, EmojiSegmentationCategory::Vs15, ], style: EmojiPresentationStyle::Text, @@ -325,7 +323,7 @@ fn text_default_emoji_vs16() { 0xFE0F, // VARIATION SELECTOR-16 ], categories: &[ - EmojiSegmentationCategory::EmojiTextPresentation, + EmojiSegmentationCategory::Emoji, EmojiSegmentationCategory::Vs16, ], style: EmojiPresentationStyle::Emoji, @@ -341,7 +339,7 @@ fn emoji_default_emoji_vs15() { 0xFE0E, // VARIATION SELECTOR-15 ], categories: &[ - EmojiSegmentationCategory::EmojiEmojiPresentation, + EmojiSegmentationCategory::EmojiPresentation, EmojiSegmentationCategory::Vs15, ], style: EmojiPresentationStyle::Text, @@ -357,7 +355,7 @@ fn emoji_default_emoji_vs16() { 0xFE0F, // VARIATION SELECTOR-16 ], categories: &[ - EmojiSegmentationCategory::EmojiEmojiPresentation, + EmojiSegmentationCategory::EmojiPresentation, EmojiSegmentationCategory::Vs16, ], style: EmojiPresentationStyle::Emoji, @@ -376,11 +374,11 @@ fn zwj_family() { 0x1F467, // GIRL ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, ], style: EmojiPresentationStyle::Emoji, }); @@ -400,13 +398,13 @@ fn long_zwj_family() { 0x1F466, // BOY ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, ], style: EmojiPresentationStyle::Emoji, }); @@ -424,11 +422,11 @@ fn zwj_couple() { 0x1F468, // MAN ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiTextPresentation, + EmojiSegmentationCategory::Emoji, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, ], style: EmojiPresentationStyle::Emoji, }); @@ -445,10 +443,10 @@ fn zwj_with_vs16_element() { 0x1F469, // WOMAN ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Vs16, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, ], style: EmojiPresentationStyle::Emoji, }); @@ -466,10 +464,10 @@ fn zwj_with_vs16_on_both_elements() { 0xFE0F, // VARIATION SELECTOR-16 ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Vs16, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::Vs16, ], style: EmojiPresentationStyle::Emoji, @@ -487,10 +485,10 @@ fn zwj_after_modifier_sequence() { 0x1F4BB, // PERSONAL COMPUTER ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::EmojiModifier, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiEmojiPresentation, + EmojiSegmentationCategory::EmojiPresentation, ], style: EmojiPresentationStyle::Emoji, }); @@ -507,10 +505,10 @@ fn zwj_technologist_with_skin_tone() { 0x1F4BB, // PERSONAL COMPUTER ], categories: &[ - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, EmojiSegmentationCategory::EmojiModifier, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiEmojiPresentation, + EmojiSegmentationCategory::EmojiPresentation, ], style: EmojiPresentationStyle::Emoji, }); @@ -527,10 +525,10 @@ fn vs16_enables_zwj_continuation() { 0x1F469, // WOMAN ], categories: &[ - EmojiSegmentationCategory::EmojiTextPresentation, + EmojiSegmentationCategory::Emoji, EmojiSegmentationCategory::Vs16, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiModifierBaseEmoji, + EmojiSegmentationCategory::EmojiModifierBase, ], style: EmojiPresentationStyle::Emoji, }); @@ -556,7 +554,7 @@ fn tag_sequence_england() { EmojiSegmentationCategory::TagSpec, EmojiSegmentationCategory::TagSpec, EmojiSegmentationCategory::TagSpec, - EmojiSegmentationCategory::TagTerm, + EmojiSegmentationCategory::TagEnd, ], style: EmojiPresentationStyle::Emoji, }); @@ -574,7 +572,7 @@ fn tag_base_as_zwj_element() { categories: &[ EmojiSegmentationCategory::TagBase, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiEmojiPresentation, + EmojiSegmentationCategory::EmojiPresentation, ], style: EmojiPresentationStyle::Emoji, }); @@ -594,7 +592,7 @@ fn tag_base_vs16_as_zwj() { EmojiSegmentationCategory::TagBase, EmojiSegmentationCategory::Vs16, EmojiSegmentationCategory::Zwj, - EmojiSegmentationCategory::EmojiEmojiPresentation, + EmojiSegmentationCategory::EmojiPresentation, ], style: EmojiPresentationStyle::Emoji, }); From ff0f90ca0cee0323af3b8aa8ef1e875b6dcd893f Mon Sep 17 00:00:00 2001 From: Fangdun Tsai Date: Fri, 15 May 2026 16:31:01 +0800 Subject: [PATCH 34/34] fix size --- parley/src/emoji/dfa.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/parley/src/emoji/dfa.rs b/parley/src/emoji/dfa.rs index fe8be1846..1275eef6d 100644 --- a/parley/src/emoji/dfa.rs +++ b/parley/src/emoji/dfa.rs @@ -6,13 +6,13 @@ use super::types::{EmojiPresentationStyle, EmojiSegmentationCategory, EmojiSeque /// The transition table for Emoji DFA. /// /// -static DFA_TRANS: [[u8; 13]; 14] = { +static DFA_TRANS: [[u8; 13]; 13] = { use EmojiSegmentationCategory as Category; use EmojiState as State; - let mut t = [[0; 13]; 14]; + let mut t = [[0; 13]; 13]; - /// Add a state transition to the DFA transition table. + /// Adds a state transition to the DFA transition table. macro_rules! add { ($state:expr, $category:expr, $next_state:expr) => { t[$state.as_usize()][$category.as_usize()] = $next_state.as_u8()