@@ -41,7 +41,13 @@ object ParquetExample {
4141 * These case classes represent both full and projected field mappings from the [[Account ]] Avro
4242 * record.
4343 */
44- case class AccountFull (id : Int , `type` : String , name : Option [String ], amount : Double )
44+ case class AccountFull (
45+ id : Int ,
46+ `type` : String ,
47+ name : Option [String ],
48+ amount : Double ,
49+ accountStatus : Option [AccountStatus ]
50+ )
4551 case class AccountProjection (id : Int , name : Option [String ])
4652
4753 /**
@@ -108,21 +114,19 @@ object ParquetExample {
108114
109115 private def avroSpecificIn (sc : ScioContext , args : Args ): ClosedTap [String ] = {
110116 // Macros for generating column projections and row predicates
111- val projection = Projection [Account ](_.getId, _.getName, _.getAmount)
117+ // account_status is the only field with default value that can be left out the projection
118+ val projection = Projection [Account ](_.getId, _.getType, _.getName, _.getAmount)
112119 val predicate = Predicate [Account ](x => x.getAmount > 0 )
113120
114121 sc.parquetAvroFile[Account ](args(" input" ), projection, predicate)
115- // The result Account records are not complete Avro objects. Only the projected columns are present while the rest are null.
116- // These objects may fail serialization and it’s recommended that you map them out to tuples or case classes right after reading.
117- .map(x => AccountProjection (x.getId, Some (x.getName.toString)))
118122 .saveAsTextFile(args(" output" ))
119123 }
120124
121125 private def avroGenericIn (sc : ScioContext , args : Args ): ClosedTap [String ] = {
122126 val schema = Account .getClassSchema
123127 implicit val genericRecordCoder : Coder [GenericRecord ] = avroGenericRecordCoder(schema)
124128
125- val parquetIn = sc.parquetAvroFile[ GenericRecord ] (args(" input" ), schema)
129+ val parquetIn = sc.parquetAvroGenericRecordFile (args(" input" ), schema)
126130
127131 // Catches a specific bug with encoding GenericRecords read by parquet-avro
128132 parquetIn
@@ -146,12 +150,19 @@ object ParquetExample {
146150 // but close to `parquet.block.size`, i.e. 1 GiB. This guarantees that each file contains 1 row group only and reduces seeks.
147151 .saveAsParquetAvroFile(args(" output" ), numShards = 1 , conf = fineTunedParquetWriterConfig)
148152
153+ private [extra] def toScalaFull (account : Account ): AccountFull =
154+ AccountFull (
155+ account.getId,
156+ account.getType.toString,
157+ Some (account.getName.toString),
158+ account.getAmount,
159+ Some (account.getAccountStatus)
160+ )
161+
149162 private def typedOut (sc : ScioContext , args : Args ): ClosedTap [AccountFull ] =
150163 sc.parallelize(fakeData)
151- .map(x => AccountFull (x.getId, x.getType.toString, Some (x.getName.toString), x.getAmount))
152- .saveAsTypedParquetFile(
153- args(" output" )
154- )
164+ .map(toScalaFull)
165+ .saveAsTypedParquetFile(args(" output" ))
155166
156167 private [extra] def toExample (account : Account ): Example = {
157168 val amount = Feature
0 commit comments