1818 */
1919package org .apache .parquet .avro ;
2020
21+ import java .lang .reflect .Constructor ;
22+ import java .lang .reflect .InvocationTargetException ;
2123import java .nio .ByteBuffer ;
2224import org .apache .avro .Schema ;
2325import org .apache .avro .generic .GenericData ;
26+ import org .apache .avro .util .Utf8 ;
2427import org .apache .parquet .column .Dictionary ;
28+ import org .apache .parquet .io .ParquetDecodingException ;
2529import org .apache .parquet .io .api .Binary ;
2630import org .apache .parquet .io .api .GroupConverter ;
2731import org .apache .parquet .io .api .PrimitiveConverter ;
@@ -44,6 +48,40 @@ public AvroPrimitiveConverter(ParentValueContainer parent) {
4448 }
4549 }
4650
51+ abstract static class BinaryConverter <T > extends AvroPrimitiveConverter {
52+ private T [] dict = null ;
53+
54+ public BinaryConverter (ParentValueContainer parent ) {
55+ super (parent );
56+ }
57+
58+ public abstract T convert (Binary binary );
59+
60+ @ Override
61+ public void addBinary (Binary value ) {
62+ parent .add (convert (value ));
63+ }
64+
65+ @ Override
66+ public boolean hasDictionarySupport () {
67+ return true ;
68+ }
69+
70+ @ Override
71+ @ SuppressWarnings ("unchecked" )
72+ public void setDictionary (Dictionary dictionary ) {
73+ dict = (T []) new Object [dictionary .getMaxId () + 1 ];
74+ for (int i = 0 ; i <= dictionary .getMaxId (); i ++) {
75+ dict [i ] = convert (dictionary .decodeToBinary (i ));
76+ }
77+ }
78+
79+ @ Override
80+ public void addValueFromDictionary (int dictionaryId ) {
81+ parent .add (dict [dictionaryId ]);
82+ }
83+ }
84+
4785 static final class FieldByteConverter extends AvroPrimitiveConverter {
4886 public FieldByteConverter (ParentValueContainer parent ) {
4987 super (parent );
@@ -54,6 +92,7 @@ public void addInt(int value) {
5492 parent .addByte ((byte ) value );
5593 }
5694 }
95+
5796 static final class FieldShortConverter extends AvroPrimitiveConverter {
5897 public FieldShortConverter (ParentValueContainer parent ) {
5998 super (parent );
@@ -133,7 +172,6 @@ final public void addLong(long value) {
133172 final public void addFloat (float value ) {
134173 parent .addFloat (value );
135174 }
136-
137175 }
138176
139177 static final class FieldDoubleConverter extends AvroPrimitiveConverter {
@@ -162,62 +200,84 @@ final public void addDouble(double value) {
162200 }
163201 }
164202
165- static final class FieldByteArrayConverter extends AvroPrimitiveConverter {
203+ static final class FieldByteArrayConverter extends BinaryConverter < byte []> {
166204 public FieldByteArrayConverter (ParentValueContainer parent ) {
167205 super (parent );
168206 }
169207
170208 @ Override
171- final public void addBinary (Binary value ) {
172- parent . add ( value . getBytes () );
209+ public byte [] convert (Binary binary ) {
210+ return binary . getBytes ();
173211 }
174212 }
175213
176- static final class FieldByteBufferConverter extends AvroPrimitiveConverter {
214+ static final class FieldByteBufferConverter extends BinaryConverter < ByteBuffer > {
177215 public FieldByteBufferConverter (ParentValueContainer parent ) {
178216 super (parent );
179217 }
180218
181219 @ Override
182- final public void addBinary (Binary value ) {
183- parent . add ( ByteBuffer .wrap (value .getBytes () ));
220+ public ByteBuffer convert (Binary binary ) {
221+ return ByteBuffer .wrap (binary .getBytes ());
184222 }
185223 }
186224
187- static final class FieldStringConverter extends AvroPrimitiveConverter {
188- // TODO: dictionary support should be generic and provided by a parent
189- // TODO: this always produces strings, but should respect avro.java.string
190- private String [] dict ;
191-
225+ static final class FieldStringConverter extends BinaryConverter <String > {
192226 public FieldStringConverter (ParentValueContainer parent ) {
193227 super (parent );
194228 }
195229
196230 @ Override
197- final public void addBinary (Binary value ) {
198- parent . add ( value . toStringUsingUTF8 () );
231+ public String convert (Binary binary ) {
232+ return binary . toStringUsingUTF8 ();
199233 }
234+ }
200235
201- @ Override
202- public boolean hasDictionarySupport ( ) {
203- return true ;
236+ static final class FieldUTF8Converter extends BinaryConverter < Utf8 > {
237+ public FieldUTF8Converter ( ParentValueContainer parent ) {
238+ super ( parent ) ;
204239 }
205240
206241 @ Override
207- public void setDictionary (Dictionary dictionary ) {
208- dict = new String [dictionary .getMaxId () + 1 ];
209- for (int i = 0 ; i <= dictionary .getMaxId (); i ++) {
210- dict [i ] = dictionary .decodeToBinary (i ).toStringUsingUTF8 ();
242+ public Utf8 convert (Binary binary ) {
243+ return new Utf8 (binary .getBytes ());
244+ }
245+ }
246+
247+ static final class FieldStringableConverter extends BinaryConverter <Object > {
248+ private final String stringableName ;
249+ private final Constructor <?> ctor ;
250+
251+ public FieldStringableConverter (ParentValueContainer parent ,
252+ Class <?> stringableClass ) {
253+ super (parent );
254+ stringableName = stringableClass .getName ();
255+ try {
256+ this .ctor = stringableClass .getConstructor (String .class );
257+ } catch (NoSuchMethodException e ) {
258+ throw new ParquetDecodingException (
259+ "Unable to get String constructor for " + stringableName , e );
211260 }
212261 }
213262
214263 @ Override
215- public void addValueFromDictionary (int dictionaryId ) {
216- parent .add (dict [dictionaryId ]);
264+ public Object convert (Binary binary ) {
265+ try {
266+ return ctor .newInstance (binary .toStringUsingUTF8 ());
267+ } catch (InstantiationException e ) {
268+ throw new ParquetDecodingException (
269+ "Cannot convert binary to " + stringableName , e );
270+ } catch (IllegalAccessException e ) {
271+ throw new ParquetDecodingException (
272+ "Cannot convert binary to " + stringableName , e );
273+ } catch (InvocationTargetException e ) {
274+ throw new ParquetDecodingException (
275+ "Cannot convert binary to " + stringableName , e );
276+ }
217277 }
218278 }
219279
220- static final class FieldEnumConverter extends AvroPrimitiveConverter {
280+ static final class FieldEnumConverter extends BinaryConverter < Object > {
221281 private final Schema schema ;
222282 private final GenericData model ;
223283
@@ -229,12 +289,12 @@ public FieldEnumConverter(ParentValueContainer parent, Schema enumSchema,
229289 }
230290
231291 @ Override
232- final public void addBinary (Binary value ) {
233- parent . add ( model .createEnum (value .toStringUsingUTF8 (), schema ) );
292+ public Object convert (Binary binary ) {
293+ return model .createEnum (binary .toStringUsingUTF8 (), schema );
234294 }
235295 }
236296
237- static final class FieldFixedConverter extends AvroPrimitiveConverter {
297+ static final class FieldFixedConverter extends BinaryConverter < Object > {
238298 private final Schema schema ;
239299 private final GenericData model ;
240300
@@ -246,8 +306,8 @@ public FieldFixedConverter(ParentValueContainer parent, Schema avroSchema,
246306 }
247307
248308 @ Override
249- final public void addBinary (Binary value ) {
250- parent . add ( model .createFixed (null /* reuse */ , value .getBytes (), schema ) );
309+ public Object convert (Binary binary ) {
310+ return model .createFixed (null /* reuse */ , binary .getBytes (), schema );
251311 }
252312 }
253313}
0 commit comments