表中有两列(名称字符串,工资小数点(10,3))并以Parquet格式存储在配置单元中。使用morphline和solar执行索引时,出现以下异常:
ERROR morphline.MorphlineMapRunner: Unable to process file <parquet file>
java.lang.ClassCastException: org.apache.avro.generic.GenericData$Record cannot be cast to java.io.InputStream
at org.kitesdk.morphline.stdio.AbstractParser.getAttachmentInputStream(AbstractParser.java:184)
at org.kitesdk.morphline.stdio.AbstractParser.doProcess(AbstractParser.java:94)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.stdlib.ConvertTimestampBuilder$ConvertTimestamp.doProcess(ConvertTimestampBuilder.java:161)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.stdlib.GenerateUUIDBuilder$GenerateUUID.doProcess(GenerateUUIDBuilder.java:98)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.avro.ExtractAvroPathsBuilder$ExtractAvroPaths.doProcess(ExtractAvroPathsBuilder.java:143)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.extract(ReadAvroParquetFileBuilder.java:201)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.doProcess(ReadAvroParquetFileBuilder.java:180)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:208)
at org.apache.solr.hadoop.MapReduceIndexerTool.dryRun(MapReduceIndexerTool.java:1250)
at org.apache.solr.hadoop.MapReduceIndexerTool.run(MapReduceIndexerTool.java:875)
at org.apache.solr.hadoop.MapReduceIndexerTool.run(MapReduceIndexerTool.java:700)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.solr.hadoop.MapReduceIndexerTool.main(MapReduceIndexerTool.java:687)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Exception in thread "main" org.kitesdk.morphline.api.MorphlineRuntimeException: java.lang.ClassCastException: org.apache.avro.generic.GenericData$Record cannot be cast to java.io.InputStream
at org.kitesdk.morphline.base.FaultTolerance.handleException(FaultTolerance.java:73)
at org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:220)
at org.apache.solr.hadoop.MapReduceIndexerTool.dryRun(MapReduceIndexerTool.java:1250)
at org.apache.solr.hadoop.MapReduceIndexerTool.run(MapReduceIndexerTool.java:875)
at org.apache.solr.hadoop.MapReduceIndexerTool.run(MapReduceIndexerTool.java:700)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.solr.hadoop.MapReduceIndexerTool.main(MapReduceIndexerTool.java:687)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at org.apache.hadoop.util.RunJar.run(RunJar.java:221)
at org.apache.hadoop.util.RunJar.main(RunJar.java:136)
Caused by: java.lang.ClassCastException: org.apache.avro.generic.GenericData$Record cannot be cast to java.io.InputStream
at org.kitesdk.morphline.stdio.AbstractParser.getAttachmentInputStream(AbstractParser.java:184)
at org.kitesdk.morphline.stdio.AbstractParser.doProcess(AbstractParser.java:94)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.stdlib.ConvertTimestampBuilder$ConvertTimestamp.doProcess(ConvertTimestampBuilder.java:161)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.stdlib.GenerateUUIDBuilder$GenerateUUID.doProcess(GenerateUUIDBuilder.java:98)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.avro.ExtractAvroPathsBuilder$ExtractAvroPaths.doProcess(ExtractAvroPathsBuilder.java:143)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.Connector.process(Connector.java:64)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.extract(ReadAvroParquetFileBuilder.java:201)
at org.kitesdk.morphline.hadoop.parquet.avro.ReadAvroParquetFileBuilder$ReadAvroParquetFile.doProcess(ReadAvroParquetFileBuilder.java:180)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.kitesdk.morphline.base.AbstractCommand.doProcess(AbstractCommand.java:186)
at org.kitesdk.morphline.base.AbstractCommand.process(AbstractCommand.java:161)
at org.apache.solr.hadoop.morphline.MorphlineMapRunner.map(MorphlineMapRunner.java:208)
... 11 more
morphline文件中的以下内容:
{
readAvroParquetFile {
readerSchemaString:"""{"type":"record","name":"employee","fields":[
{"name":"name","type":["string","null"],"default":""},
{"name": "salary","type":
["bytes","null"],"logicalType":"decimal","precision":10,"scale":4,"default":0 }
]}"""
}
}
关于如何索引表的Parquet文件的任何帮助都包含使用morpline和solar的十进制列。
1条答案
按热度按时间pnwntuvh1#
每http://kitesdk.org/docs/current/morphlines/morphlines-reference-guide.html#readavroparquetfile:“morphline记录输入字段文件\u upload \u url必须包含要读取的Parquet文件的hdfs路径(此字段已随mapreduceindexertool提供。”