Skip to content

Commit 028e39d

Browse files
jparachoniakcopybara-github
authored andcommitted
Adding code to fix proto 3 unrecognized enum serialization/deserialization
Designed to take no extra memory compared to not tracking the unrecognized values and minimal extra execution time. PiperOrigin-RevId: 899198543
1 parent 878867c commit 028e39d

File tree

4 files changed

+167
-4
lines changed

4 files changed

+167
-4
lines changed

protobuf/runtime/src/com/google/protobuf/Descriptors_PackagePrivate.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,43 @@ CGP_ALWAYS_INLINE BOOL CGPJavaTypeIsEnum(CGPFieldJavaType type) {
264264
return type == ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_ENUM;
265265
}
266266

267+
// Gets the integer value of an enum.
268+
//
269+
// This function is updated to support preservation of unknown enum values in proto3.
270+
//
271+
// How it works:
272+
// - Valid enums are pointers to static singleton objects. Since objects are aligned
273+
// to at least 8 bytes on 64-bit systems, the lowest bit of a valid pointer is always 0.
274+
// - Unrecognized enum values are stored by shifting the raw 32-bit value into the
275+
// upper 32 bits and setting the lowest bit to 1 (tagging it).
276+
// Representation: (raw_value << 32) | 0x1
277+
//
278+
// Why this is safe:
279+
// 1. ARC (Automatic Reference Counting): The J2ObjC runtime does not retain/release
280+
// enum fields (see CGPIsRetainedType in Descriptors.m returning NO for enums).
281+
// Therefore, storing a non-pointer value in the id field will not cause ARC to
282+
// attempt to dereference it and crash.
283+
// 2. PAC (Pointer Authentication Codes): PAC uses the upper bits of pointers on
284+
// ARM64e to store a signature. We do not touch the upper bits of VALID pointers.
285+
// We only use the upper bits when the value is NOT a pointer (indicated by the
286+
// lowest bit being 1). Since we don't try to authenticate this tagged value as a
287+
// pointer, PAC is not triggered.
288+
//
289+
// Why this is correct:
290+
// - In proto3, the 0 value is always defined (required by spec). Thus, an unknown
291+
// value can never be 0. The shifted raw value in the upper 32 bits will therefore
292+
// always be non-zero for unknown values, ensuring we can distinguish it from a
293+
// valid pointer where the upper 32 bits are naturally zero.
267294
CGP_ALWAYS_INLINE jint CGPEnumGetIntValue(CGPEnumDescriptor *descriptor, id enumObj) {
295+
uintptr_t stored_val = (uintptr_t)(ARCBRIDGE void *)enumObj;
296+
297+
// Check the lowest bit. If it is 1, it is an unrecognized tagged value.
298+
if (stored_val & 0x1) {
299+
// Return the raw value stored in the upper 32 bits.
300+
return (jint)(stored_val >> 32);
301+
}
302+
303+
// Otherwise, it is a valid pointer to a static enum singleton.
268304
return *(jint *)((char *)(ARCBRIDGE void *)enumObj + descriptor->valueOffset_);
269305
}
270306

protobuf/runtime/src/com/google/protobuf/GeneratedMessage.mm

Lines changed: 91 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,44 @@ static void SingularSetRetainable(id msg, TYPE_Retainable value, size_t offset,
374374

375375
#undef REPEATED_GETTER_IMP
376376

377+
// Getter for singular enum fields. Intercepts tagged pointers for unknown values
378+
// and returns the UNRECOGNIZED singleton instead.
379+
static IMP GetSingularGetterImpEnum(size_t offset, CGPHasLocator hasLoc, id defaultValue,
380+
CGPFieldDescriptor *field) {
381+
CGPEnumDescriptor *enumType = [field getEnumType];
382+
id unrecognizedSingleton =
383+
((CGPEnumValueDescriptor *)enumType->values_->buffer_[enumType->values_->size_ - 1])->enum_;
384+
385+
return imp_implementationWithBlock(^id(id msg) {
386+
if (GetHas(msg, hasLoc)) {
387+
id value = *FIELD_PTR(id, msg, offset);
388+
uintptr_t stored_val = (uintptr_t)(__bridge void *)value;
389+
if (stored_val & 0x1) {
390+
return unrecognizedSingleton;
391+
}
392+
return value;
393+
}
394+
return defaultValue;
395+
});
396+
}
397+
398+
// Getter for repeated enum fields. Intercepts tagged pointers for unknown values
399+
// and returns the UNRECOGNIZED singleton instead.
400+
static IMP GetRepeatedGetterImpEnum(size_t offset, CGPFieldDescriptor *field) {
401+
CGPEnumDescriptor *enumType = [field getEnumType];
402+
id unrecognizedSingleton =
403+
((CGPEnumValueDescriptor *)enumType->values_->buffer_[enumType->values_->size_ - 1])->enum_;
404+
405+
return imp_implementationWithBlock(^id(id msg, jint idx) {
406+
id value = CGPRepeatedFieldGetId(REPEATED_FIELD_PTR(msg, offset), idx);
407+
uintptr_t stored_val = (uintptr_t)(__bridge void *)value;
408+
if (stored_val & 0x1) {
409+
return unrecognizedSingleton;
410+
}
411+
return value;
412+
});
413+
}
414+
377415
static BOOL AddGetterMethod(Class cls, SEL sel, CGPFieldDescriptor *field) {
378416
BOOL repeated = CGPFieldIsRepeated(field);
379417
IMP imp = NULL;
@@ -388,7 +426,30 @@ static BOOL AddGetterMethod(Class cls, SEL sel, CGPFieldDescriptor *field) {
388426
strcpy(encoding, @encode(TYPE_##NAME)); \
389427
break;
390428

391-
SWITCH_TYPES_NO_ENUM(CGPFieldGetJavaType(field), ADD_GETTER_METHOD_CASE)
429+
// We expand the switch manually instead of using SWITCH_TYPES_NO_ENUM
430+
// to handle ENUM fields specially and avoid RETAIN_AND_AUTORELEASE on tagged pointers.
431+
switch (CGPFieldGetJavaType(field)) {
432+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_INT:
433+
ADD_GETTER_METHOD_CASE(Int)
434+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_LONG:
435+
ADD_GETTER_METHOD_CASE(Long)
436+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_FLOAT:
437+
ADD_GETTER_METHOD_CASE(Float)
438+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_DOUBLE:
439+
ADD_GETTER_METHOD_CASE(Double)
440+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_BOOLEAN:
441+
ADD_GETTER_METHOD_CASE(Bool)
442+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_STRING:
443+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_BYTE_STRING:
444+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_MESSAGE:
445+
ADD_GETTER_METHOD_CASE(Id)
446+
case ComGoogleProtobufDescriptors_FieldDescriptor_JavaType_Enum_ENUM:
447+
imp = repeated ? GetRepeatedGetterImpEnum(offset, field)
448+
: GetSingularGetterImpEnum(offset, hasLoc, field->data_->defaultValue.valueId,
449+
field);
450+
strcpy(encoding, @encode(id));
451+
break;
452+
}
392453

393454
#undef ADD_GETTER_METHOD_CASE
394455

@@ -1579,11 +1640,37 @@ static inline BOOL ReadEnumValueDescriptor(CGPCodedInputStream *input, CGPEnumDe
15791640
return YES;
15801641
}
15811642

1643+
// Reads an enum value from the stream and resolves it to a Java enum instance.
1644+
//
1645+
// This function is preserves unknown enum values in proto3 (open enums)
1646+
// by storing them as tagged integers instead of falling back to the UNRECOGNIZED
1647+
// singleton.
1648+
//
1649+
// See CGPEnumGetIntValue in Descriptors_PackagePrivate.h for details on the
1650+
// tagged representation and why it is safe with ARC and PAC.
15821651
static BOOL ReadEnumJavaValue(CGPCodedInputStream *input, CGPEnumDescriptor *enumType,
15831652
id *javaValue) {
1584-
CGPEnumValueDescriptor *valueDescriptor;
1585-
if (!ReadEnumValueDescriptor(input, enumType, &valueDescriptor)) return NO;
1586-
*javaValue = valueDescriptor == nil ? nil : valueDescriptor->enum_;
1653+
jint value;
1654+
if (!CGPReadEnum(input, &value)) return NO;
1655+
1656+
CGPEnumValueDescriptor *valueDescriptor = CGPEnumValueDescriptorFromInt(enumType, value);
1657+
1658+
if (valueDescriptor == nil) {
1659+
// Closed enum (proto2) and value was not found. We store nil.
1660+
*javaValue = nil;
1661+
} else if (!enumType->is_closed_ &&
1662+
valueDescriptor == enumType->values_->buffer_[enumType->values_->size_ - 1]) {
1663+
// Open enum (proto3) and the value was not found, so CGPEnumValueDescriptorFromInt
1664+
// returned the UNRECOGNIZED descriptor (which is always the last element in values_).
1665+
//
1666+
// We store the raw value in the upper 32 bits and set the lowest bit to 1.
1667+
// This preserves the value for serialization while remaining safe from ARC.
1668+
*javaValue = (id)(ARCBRIDGE void *)(((uintptr_t)value << 32) | 0x1);
1669+
} else {
1670+
// Found a valid known descriptor. Store the singleton pointer.
1671+
*javaValue = valueDescriptor->enum_;
1672+
}
1673+
15871674
return YES;
15881675
}
15891676

protobuf/tests/Proto3EnumTest.java

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,4 +101,42 @@ public void testNegativeEnumNumber() throws Exception {
101101
Text text = Text.parseFrom(new byte[] {0x08, 0x7f}, ExtensionRegistry.getEmptyRegistry());
102102
assertThat(text.getGreeting()).isSameInstanceAs(Greetings.UNRECOGNIZED);
103103
}
104+
105+
public void testSingularParseUnknownEnumSerialization() throws Exception {
106+
// field 1 (fruit), value 5 (unrecognized)
107+
// Tag: (1 << 3) | 0 = 8
108+
// Value: 5
109+
byte[] bytes = new byte[] {0x08, 0x05};
110+
FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry());
111+
112+
byte[] outputBytes = box.toByteArray();
113+
assertThat(outputBytes).isEqualTo(bytes);
114+
}
115+
116+
public void testRepeatedParseUnknownEnumSerialization() throws Exception {
117+
// field 2 (fruits), repeated, packed
118+
// Tag: (2 << 3) | 2 = 18
119+
// Length: 3
120+
// Values: 1 (APPLE), 2 (BANANA), 5 (unrecognized)
121+
byte[] bytes = new byte[] {0x12, 0x03, 0x01, 0x02, 0x05};
122+
FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry());
123+
124+
byte[] outputBytes = box.toByteArray();
125+
assertThat(outputBytes).isEqualTo(bytes);
126+
}
127+
128+
public void testMapParseUnknownEnumSerialization() throws Exception {
129+
// field 3 (fruit_map), map
130+
// Tag: (3 << 3) | 2 = 26
131+
// Length: 4
132+
// Map Entry:
133+
// key: field 1, value 1 -> 0x08 0x01
134+
// value: field 2, value 5 -> 0x10 0x05
135+
byte[] bytes = new byte[] {0x1a, 0x04, 0x08, 0x01, 0x10, 0x05};
136+
FruitBox box = FruitBox.parseFrom(bytes, ExtensionRegistry.getEmptyRegistry());
137+
138+
byte[] outputBytes = box.toByteArray();
139+
assertThat(outputBytes).isEqualTo(bytes);
140+
}
141+
104142
}

protobuf/tests/protos/proto3_enum.proto

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ enum Fruit {
2727

2828
message FruitBox {
2929
Fruit fruit = 1;
30+
repeated Fruit fruits = 2;
31+
map<int32, Fruit> fruit_map = 3;
3032
}
3133

3234
enum Greetings {

0 commit comments

Comments
 (0)