java—当将json转换为avro文件时,只有3/4的行被转换,我做错了什么?

jvlzgdj9  于 2021-06-01  发布在  Hadoop
关注(0)|答案(0)|浏览(211)

这是我的代码:

package hadoopPlayground;

import java.io.BufferedReader;

import org.apache.avro.Schema;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.commons.io.IOUtils;

public class json2AVRO {
    public static void main( String[] args ) throws Exception
    {
    String filename = "ds214_arrivi_mensili.json";
    File JSONFile = new File(filename);
    String filename2 = "ds214_arrivi_mensili.avsc";
    File AVSCFile = new File(filename2);

    BufferedReader read = new BufferedReader(new FileReader(JSONFile));
    BufferedReader read2 = new BufferedReader(new FileReader(AVSCFile));

    String outputName = JSONFile.toString().substring(0, 
            JSONFile.toString().lastIndexOf(".")) + ".avro"; 

    String json = org.apache.commons.io.IOUtils.toString(read);
    String schema = org.apache.commons.io.IOUtils.toString(read2);

    InputStream input;
    Encoder encoder;
    ByteArrayOutputStream output;

            Schema schema1 = new Schema.Parser().parse(AVSCFile);

            DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema1);

            input = new ByteArrayInputStream(json.getBytes());

            output = new ByteArrayOutputStream();

            DataInputStream din = new DataInputStream(input);

            Decoder decoder = DecoderFactory.get().jsonDecoder(schema1, din);

            System.out.println(decoder);

            encoder = EncoderFactory.get().binaryEncoder(output, null);
            GenericRecord datum;

            GenericDatumWriter<GenericRecord> writer1 = new GenericDatumWriter<GenericRecord>(schema1);

            File file= new File(outputName);

            DataFileWriter<GenericRecord> dataWriter = new DataFileWriter<GenericRecord>(writer1);

            dataWriter.create(schema1, file);
            try
            {
            for (int i = 0; i < json.length(); i++) {
                            datum = reader.read(null, decoder);
                            dataWriter.append(datum);

                            System.out.println(datum);

                            output.close();
                        }                   
            }
                    catch (IOException e)
                    {
            }
            finally
            {
                //Here is the flushing and closing
                try
                {
                    if (encoder != null)
                    {
                        encoder.flush();
                    }
                    if (output != null)
                    {
                        output.close();
                    }
                } catch (IOException e)
                {
                    throw new RuntimeException(e);
                }
            }

            }
        }

文件被正确地转换为avro(显然),但是当我尝试从终端将avro转换为json时,它只显示了1692行,超出了预期的长度。。。怎么了?
我已经检查了我的json,并将json从终端转换为avro,我没有任何问题。在1692行没有奇怪的符号。

暂无答案!

目前还没有任何答案,快来回答吧!

相关问题