From issues-return-166869-archive-asf-public=cust-asf.ponee.io@flink.apache.org Tue May 15 20:15:40 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id B05E4180634 for ; Tue, 15 May 2018 20:15:39 +0200 (CEST) Received: (qmail 46641 invoked by uid 500); 15 May 2018 18:15:38 -0000 Mailing-List: contact issues-help@flink.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@flink.apache.org Delivered-To: mailing list issues@flink.apache.org Received: (qmail 46632 invoked by uid 99); 15 May 2018 18:15:38 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 15 May 2018 18:15:38 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id A6D19F4DCE; Tue, 15 May 2018 18:15:38 +0000 (UTC) From: dawidwys To: issues@flink.apache.org Reply-To: issues@flink.apache.org References: In-Reply-To: Subject: [GitHub] flink pull request #5995: [FLINK-9337] Implemented AvroDeserializationSchema Content-Type: text/plain Message-Id: <20180515181538.A6D19F4DCE@git1-us-west.apache.org> Date: Tue, 15 May 2018 18:15:38 +0000 (UTC) Github user dawidwys commented on a diff in the pull request: https://github.com/apache/flink/pull/5995#discussion_r188386286 --- Diff: flink-formats/flink-avro/src/main/java/org/apache/flink/formats/avro/AvroDeserializationSchema.java --- @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.formats.avro; + +import org.apache.flink.api.common.serialization.DeserializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.formats.avro.typeutils.AvroTypeInfo; +import org.apache.flink.formats.avro.typeutils.GenericRecordAvroTypeInfo; +import org.apache.flink.formats.avro.utils.MutableByteArrayInputStream; +import org.apache.flink.util.Preconditions; + +import org.apache.avro.Schema; +import org.apache.avro.generic.GenericDatumReader; +import org.apache.avro.generic.GenericRecord; +import org.apache.avro.io.Decoder; +import org.apache.avro.io.DecoderFactory; +import org.apache.avro.reflect.ReflectData; +import org.apache.avro.reflect.ReflectDatumReader; +import org.apache.avro.specific.SpecificData; +import org.apache.avro.specific.SpecificDatumReader; +import org.apache.avro.specific.SpecificRecord; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.io.ObjectInputStream; + +/** + * Deserialization schema that deserializes from Avro binary format. + * + * @param type of record it produces + */ +public class AvroDeserializationSchema implements DeserializationSchema { + + /** + * Class to deserialize to. + */ + private Class recordClazz; + + private String schemaString = null; + + /** + * Reader that deserializes byte array into a record. + */ + private transient GenericDatumReader datumReader; + + /** + * Input stream to read message from. + */ + private transient MutableByteArrayInputStream inputStream; + + /** + * Avro decoder that decodes binary data. + */ + private transient Decoder decoder; + + /** + * Avro schema for the reader. + */ + private transient Schema reader; + + /** + * Creates a Avro deserialization schema. + * + * @param recordClazz class to which deserialize. Should be one of: + * {@link org.apache.avro.specific.SpecificRecord}, + * {@link org.apache.avro.generic.GenericRecord}. + * @param reader reader's Avro schema. Should be provided if recordClazz is + * {@link GenericRecord} + */ + AvroDeserializationSchema(Class recordClazz, @Nullable Schema reader) { + Preconditions.checkNotNull(recordClazz, "Avro record class must not be null."); + this.recordClazz = recordClazz; + this.inputStream = new MutableByteArrayInputStream(); + this.decoder = DecoderFactory.get().binaryDecoder(inputStream, null); + this.reader = reader; + if (reader != null) { + this.schemaString = reader.toString(); + } + } + + /** + * Creates {@link AvroDeserializationSchema} that produces {@link GenericRecord} using provided schema. + * + * @param schema schema of produced records + * @return deserialized record in form of {@link GenericRecord} + */ + public static AvroDeserializationSchema forGeneric(Schema schema) { + return new AvroDeserializationSchema<>(GenericRecord.class, schema); + } + + /** + * Creates {@link AvroDeserializationSchema} that produces classes that were generated from avro schema. + * + * @param tClass class of record to be produced + * @return deserialized record + */ + public static AvroDeserializationSchema forSpecific(Class tClass) { + return new AvroDeserializationSchema<>(tClass, null); + } + + GenericDatumReader getDatumReader() { + if (datumReader != null) { + return datumReader; + } + + if (SpecificRecord.class.isAssignableFrom(recordClazz)) { + this.datumReader = new SpecificDatumReader<>(); + } else if (GenericRecord.class.isAssignableFrom(recordClazz)) { + this.datumReader = new GenericDatumReader<>(); + } else { + this.datumReader = new ReflectDatumReader<>(); + } + + return datumReader; + } + + Schema getReaderSchema() { + if (reader != null) { + return reader; + } + + if (SpecificRecord.class.isAssignableFrom(recordClazz)) { + this.reader = SpecificData.get().getSchema(recordClazz); --- End diff -- Didn't think about it, sorry. ---