org.apache.parquet.column.Encoding.getValuesReader()方法的使用及代码示例

x33g5p2x  于2022-01-19 转载在 其他  
字(15.3k)|赞(0)|评价(0)|浏览(123)

本文整理了Java中org.apache.parquet.column.Encoding.getValuesReader()方法的一些代码示例,展示了Encoding.getValuesReader()的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Encoding.getValuesReader()方法的具体详情如下:
包路径:org.apache.parquet.column.Encoding
类名称:Encoding
方法名:getValuesReader

Encoding.getValuesReader介绍

[英]To read decoded values that don't require a dictionary
[中]读取不需要字典的解码值

代码示例

代码示例来源:origin: apache/hive

private void readPageV1(DataPageV1 page) {
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  BytesInput bytes = page.getBytes();
  LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records");
  ByteBufferInputStream in = bytes.toInputStream();
  LOG.debug("reading repetition levels at " + in.position());
  rlReader.initFromPage(pageValueCount, in);
  LOG.debug("reading definition levels at " + in.position());
  dlReader.initFromPage(pageValueCount, in);
  LOG.debug("reading data at " + in.position());
  initDataReader(page.getValueEncoding(), in, page.getValueCount());
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
 }
}

代码示例来源:origin: apache/hive

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount)
  throws IOException {
 this.pageValueCount = valueCount;
 this.endOfPageValueCount = valuesRead + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  this.dataColumn = null;
  if (dictionary == null) {
   throw new IOException(
     "could not read page in col " + descriptor +
       " as the dictionary was missing for encoding " + dataEncoding);
  }
  dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
    dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary
      .getDictionary()), skipTimestampConversion);
  this.isCurrentPageDictionaryEncoded = true;
 } else {
  dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
    dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion);
  this.isCurrentPageDictionaryEncoded = false;
 }
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new IOException("could not read page in col " + descriptor, e);
 }
}

代码示例来源:origin: org.apache.spark/spark-sql_2.11

private void readPageV1(DataPageV1 page) throws IOException {
 this.pageValueCount = page.getValueCount();
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 ValuesReader dlReader;
 // Initialize the decoders.
 if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
  throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
 }
 int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
 this.defColumn = new VectorizedRleValuesReader(bitWidth);
 dlReader = this.defColumn;
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  BytesInput bytes = page.getBytes();
  ByteBufferInputStream in = bytes.toInputStream();
  rlReader.initFromPage(pageValueCount, in);
  dlReader.initFromPage(pageValueCount, in);
  initDataReader(page.getValueEncoding(), in);
 } catch (IOException e) {
  throw new IOException("could not read page " + page + " in col " + descriptor, e);
 }
}

代码示例来源:origin: org.apache.spark/spark-sql

private void readPageV1(DataPageV1 page) throws IOException {
 this.pageValueCount = page.getValueCount();
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 ValuesReader dlReader;
 // Initialize the decoders.
 if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
  throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
 }
 int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
 this.defColumn = new VectorizedRleValuesReader(bitWidth);
 dlReader = this.defColumn;
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  BytesInput bytes = page.getBytes();
  ByteBufferInputStream in = bytes.toInputStream();
  rlReader.initFromPage(pageValueCount, in);
  dlReader.initFromPage(pageValueCount, in);
  initDataReader(page.getValueEncoding(), in);
 } catch (IOException e) {
  throw new IOException("could not read page " + page + " in col " + descriptor, e);
 }
}

代码示例来源:origin: org.apache.spark/spark-sql_2.10

private void readPageV1(DataPageV1 page) throws IOException {
 this.pageValueCount = page.getValueCount();
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 ValuesReader dlReader;
 // Initialize the decoders.
 if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
  throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
 }
 int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
 this.defColumn = new VectorizedRleValuesReader(bitWidth);
 dlReader = this.defColumn;
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  byte[] bytes = page.getBytes().toByteArray();
  rlReader.initFromPage(pageValueCount, bytes, 0);
  int next = rlReader.getNextOffset();
  dlReader.initFromPage(pageValueCount, bytes, next);
  next = dlReader.getNextOffset();
  initDataReader(page.getValueEncoding(), bytes, next);
 } catch (IOException e) {
  throw new IOException("could not read page " + page + " in col " + descriptor, e);
 }
}

代码示例来源:origin: org.apache.parquet/parquet-column

private void readPageV1(DataPageV1 page) {
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
 ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  BytesInput bytes = page.getBytes();
  LOG.debug("page size {} bytes and {} records", bytes.size(), pageValueCount);
  LOG.debug("reading repetition levels at 0");
  ByteBufferInputStream in = bytes.toInputStream();
  rlReader.initFromPage(pageValueCount, in);
  LOG.debug("reading definition levels at {}", in.position());
  dlReader.initFromPage(pageValueCount, in);
  LOG.debug("reading data at {}", in.position());
  initDataReader(page.getValueEncoding(), in, page.getValueCount());
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
 }
}

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column

private void readPageV1(DataPageV1 page) {
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
 ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 int valueCount = page.getValueCount();
 try {
  BytesInput bytes = page.getBytes();
  LOG.debug("page size {} bytes and {} values", bytes.size(), valueCount);
  LOG.debug("reading repetition levels at 0");
  ByteBufferInputStream in = bytes.toInputStream();
  rlReader.initFromPage(valueCount, in);
  LOG.debug("reading definition levels at {}", in.position());
  dlReader.initFromPage(valueCount, in);
  LOG.debug("reading data at {}", in.position());
  initDataReader(page.getValueEncoding(), in, valueCount);
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
 }
 newPageInitialized(page);
}

代码示例来源:origin: Netflix/iceberg

private void initFromPage(DataPageV1 page) {
 this.triplesCount = page.getValueCount();
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(desc, REPETITION_LEVEL);
 ValuesReader dlReader = page.getDlEncoding().getValuesReader(desc, DEFINITION_LEVEL);
 this.repetitionLevels = new ValuesReaderIntIterator(rlReader);
 this.definitionLevels = new ValuesReaderIntIterator(dlReader);
 try {
  BytesInput bytes = page.getBytes();
  LOG.debug("page size {} bytes and {} records", bytes.size(), triplesCount);
  LOG.debug("reading repetition levels at 0");
  ByteBufferInputStream in = bytes.toInputStream();
  rlReader.initFromPage(triplesCount, in);
  LOG.debug("reading definition levels at {}", in.position());
  dlReader.initFromPage(triplesCount, in);
  LOG.debug("reading data at {}", in.position());
  initDataReader(page.getValueEncoding(), in, page.getValueCount());
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page " + page + " in col " + desc, e);
 }
}

代码示例来源:origin: com.alibaba.blink/flink-table

private void readPageV1(DataPageV1 page) {
  this.pageValueCount = page.getValueCount();
  ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
  // Initialize the decoders.
  if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
    throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
  }
  int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
  this.defColumn = new VectorizedDefValuesReader(bitWidth);
  ValuesReader dlReader = this.defColumn;
  try {
    byte[] bytes = page.getBytes().toByteArray();
    LOG.debug("page size " + bytes.length + " bytes and " + pageValueCount + " records");
    LOG.debug("reading repetition levels at 0");
    rlReader.initFromPage(pageValueCount, bytes, 0);
    int next = rlReader.getNextOffset();
    LOG.debug("reading definition levels at " + next);
    dlReader.initFromPage(pageValueCount, bytes, next);
    next = dlReader.getNextOffset();
    LOG.debug("reading data at " + next);
    initDataReader(page.getValueEncoding(), bytes, next);
  } catch (IOException e) {
    throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
  }
}

代码示例来源:origin: Netflix/iceberg

this.values = dataEncoding.getValuesReader(desc, VALUES);

代码示例来源:origin: org.apache.drill.exec/drill-java-exec

repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
 repetitionLevels.initFromPage(currentPageCount, in);
 repetitionLevels.readInteger();
definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
parentColumnReader.currDefLevel = -1;

代码示例来源:origin: io.snappydata/snappy-spark-sql

private void readPageV1(DataPageV1 page) throws IOException {
 this.pageValueCount = page.getValueCount();
 ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
 ValuesReader dlReader;
 // Initialize the decoders.
 if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
  throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
 }
 int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
 this.defColumn = new VectorizedRleValuesReader(bitWidth);
 dlReader = this.defColumn;
 this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
 this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
 try {
  byte[] bytes = page.getBytes().toByteArray();
  rlReader.initFromPage(pageValueCount, bytes, 0);
  int next = rlReader.getNextOffset();
  dlReader.initFromPage(pageValueCount, bytes, next);
  next = dlReader.getNextOffset();
  initDataReader(page.getValueEncoding(), bytes, next);
 } catch (IOException e) {
  throw new IOException("could not read page " + page + " in col " + descriptor, e);
 }
}

代码示例来源:origin: org.apache.drill.exec/drill-java-exec

repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
repetitionLevels.initFromPage(currentPageCount, in);
definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
definitionLevels.initFromPage(currentPageCount, in);
readPosInBytes = in.position();
if (!valueEncoding.usesDictionary()) {
 valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
 valueReader.initFromPage(currentPageCount, in);
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, in);

代码示例来源:origin: dremio/dremio-oss

repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
repetitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
definitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
readPosInBytes = definitionLevels.getNextOffset();
if (!valueEncoding.usesDictionary()) {
 valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
 valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);

代码示例来源:origin: org.apache.parquet/parquet-column

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
 ValuesReader previousReader = this.dataColumn;
 this.currentEncoding = dataEncoding;
 this.pageValueCount = valueCount;
 this.endOfPageValueCount = readValues + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  if (dictionary == null) {
   throw new ParquetDecodingException(
     "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
  }
  this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
 } else {
  this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
 }
 if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
  bindToDictionary(dictionary);
 } else {
  bind(path.getType());
 }
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page in col " + path, e);
 }
 if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
   previousReader != null && previousReader instanceof RequiresPreviousReader) {
  // previous reader can only be set if reading sequentially
  ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
 }
}

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
 ValuesReader previousReader = this.dataColumn;
 this.currentEncoding = dataEncoding;
 this.pageValueCount = valueCount;
 this.endOfPageValueCount = readValues + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  if (dictionary == null) {
   throw new ParquetDecodingException(
     "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
  }
  this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
 } else {
  this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
 }
 if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
  bindToDictionary(dictionary);
 } else {
  bind(path.getType());
 }
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page in col " + path, e);
 }
 if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
   previousReader != null && previousReader instanceof RequiresPreviousReader) {
  // previous reader can only be set if reading sequentially
  ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
 }
}

相关文章