本文整理了Java中org.apache.parquet.column.Encoding.getValuesReader()
方法的一些代码示例,展示了Encoding.getValuesReader()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Encoding.getValuesReader()
方法的具体详情如下:
包路径:org.apache.parquet.column.Encoding
类名称:Encoding
方法名:getValuesReader
[英]To read decoded values that don't require a dictionary
[中]读取不需要字典的解码值
代码示例来源:origin: apache/hive
private void readPageV1(DataPageV1 page) {
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records");
ByteBufferInputStream in = bytes.toInputStream();
LOG.debug("reading repetition levels at " + in.position());
rlReader.initFromPage(pageValueCount, in);
LOG.debug("reading definition levels at " + in.position());
dlReader.initFromPage(pageValueCount, in);
LOG.debug("reading data at " + in.position());
initDataReader(page.getValueEncoding(), in, page.getValueCount());
} catch (IOException e) {
throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: apache/hive
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount)
throws IOException {
this.pageValueCount = valueCount;
this.endOfPageValueCount = valuesRead + pageValueCount;
if (dataEncoding.usesDictionary()) {
this.dataColumn = null;
if (dictionary == null) {
throw new IOException(
"could not read page in col " + descriptor +
" as the dictionary was missing for encoding " + dataEncoding);
}
dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary
.getDictionary()), skipTimestampConversion);
this.isCurrentPageDictionaryEncoded = true;
} else {
dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion);
this.isCurrentPageDictionaryEncoded = false;
}
try {
dataColumn.initFromPage(pageValueCount, in);
} catch (IOException e) {
throw new IOException("could not read page in col " + descriptor, e);
}
}
代码示例来源:origin: org.apache.spark/spark-sql_2.11
private void readPageV1(DataPageV1 page) throws IOException {
this.pageValueCount = page.getValueCount();
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
ValuesReader dlReader;
// Initialize the decoders.
if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
}
int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
this.defColumn = new VectorizedRleValuesReader(bitWidth);
dlReader = this.defColumn;
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
ByteBufferInputStream in = bytes.toInputStream();
rlReader.initFromPage(pageValueCount, in);
dlReader.initFromPage(pageValueCount, in);
initDataReader(page.getValueEncoding(), in);
} catch (IOException e) {
throw new IOException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: org.apache.spark/spark-sql
private void readPageV1(DataPageV1 page) throws IOException {
this.pageValueCount = page.getValueCount();
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
ValuesReader dlReader;
// Initialize the decoders.
if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
}
int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
this.defColumn = new VectorizedRleValuesReader(bitWidth);
dlReader = this.defColumn;
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
ByteBufferInputStream in = bytes.toInputStream();
rlReader.initFromPage(pageValueCount, in);
dlReader.initFromPage(pageValueCount, in);
initDataReader(page.getValueEncoding(), in);
} catch (IOException e) {
throw new IOException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: org.apache.spark/spark-sql_2.10
private void readPageV1(DataPageV1 page) throws IOException {
this.pageValueCount = page.getValueCount();
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
ValuesReader dlReader;
// Initialize the decoders.
if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
}
int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
this.defColumn = new VectorizedRleValuesReader(bitWidth);
dlReader = this.defColumn;
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
byte[] bytes = page.getBytes().toByteArray();
rlReader.initFromPage(pageValueCount, bytes, 0);
int next = rlReader.getNextOffset();
dlReader.initFromPage(pageValueCount, bytes, next);
next = dlReader.getNextOffset();
initDataReader(page.getValueEncoding(), bytes, next);
} catch (IOException e) {
throw new IOException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: org.apache.parquet/parquet-column
private void readPageV1(DataPageV1 page) {
ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
LOG.debug("page size {} bytes and {} records", bytes.size(), pageValueCount);
LOG.debug("reading repetition levels at 0");
ByteBufferInputStream in = bytes.toInputStream();
rlReader.initFromPage(pageValueCount, in);
LOG.debug("reading definition levels at {}", in.position());
dlReader.initFromPage(pageValueCount, in);
LOG.debug("reading data at {}", in.position());
initDataReader(page.getValueEncoding(), in, page.getValueCount());
} catch (IOException e) {
throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
}
}
代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column
private void readPageV1(DataPageV1 page) {
ValuesReader rlReader = page.getRlEncoding().getValuesReader(path, REPETITION_LEVEL);
ValuesReader dlReader = page.getDlEncoding().getValuesReader(path, DEFINITION_LEVEL);
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
int valueCount = page.getValueCount();
try {
BytesInput bytes = page.getBytes();
LOG.debug("page size {} bytes and {} values", bytes.size(), valueCount);
LOG.debug("reading repetition levels at 0");
ByteBufferInputStream in = bytes.toInputStream();
rlReader.initFromPage(valueCount, in);
LOG.debug("reading definition levels at {}", in.position());
dlReader.initFromPage(valueCount, in);
LOG.debug("reading data at {}", in.position());
initDataReader(page.getValueEncoding(), in, valueCount);
} catch (IOException e) {
throw new ParquetDecodingException("could not read page " + page + " in col " + path, e);
}
newPageInitialized(page);
}
代码示例来源:origin: Netflix/iceberg
private void initFromPage(DataPageV1 page) {
this.triplesCount = page.getValueCount();
ValuesReader rlReader = page.getRlEncoding().getValuesReader(desc, REPETITION_LEVEL);
ValuesReader dlReader = page.getDlEncoding().getValuesReader(desc, DEFINITION_LEVEL);
this.repetitionLevels = new ValuesReaderIntIterator(rlReader);
this.definitionLevels = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
LOG.debug("page size {} bytes and {} records", bytes.size(), triplesCount);
LOG.debug("reading repetition levels at 0");
ByteBufferInputStream in = bytes.toInputStream();
rlReader.initFromPage(triplesCount, in);
LOG.debug("reading definition levels at {}", in.position());
dlReader.initFromPage(triplesCount, in);
LOG.debug("reading data at {}", in.position());
initDataReader(page.getValueEncoding(), in, page.getValueCount());
} catch (IOException e) {
throw new ParquetDecodingException("could not read page " + page + " in col " + desc, e);
}
}
代码示例来源:origin: com.alibaba.blink/flink-table
private void readPageV1(DataPageV1 page) {
this.pageValueCount = page.getValueCount();
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
// Initialize the decoders.
if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
}
int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
this.defColumn = new VectorizedDefValuesReader(bitWidth);
ValuesReader dlReader = this.defColumn;
try {
byte[] bytes = page.getBytes().toByteArray();
LOG.debug("page size " + bytes.length + " bytes and " + pageValueCount + " records");
LOG.debug("reading repetition levels at 0");
rlReader.initFromPage(pageValueCount, bytes, 0);
int next = rlReader.getNextOffset();
LOG.debug("reading definition levels at " + next);
dlReader.initFromPage(pageValueCount, bytes, next);
next = dlReader.getNextOffset();
LOG.debug("reading data at " + next);
initDataReader(page.getValueEncoding(), bytes, next);
} catch (IOException e) {
throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: Netflix/iceberg
this.values = dataEncoding.getValuesReader(desc, VALUES);
代码示例来源:origin: org.apache.drill.exec/drill-java-exec
repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
repetitionLevels.initFromPage(currentPageCount, in);
repetitionLevels.readInteger();
definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
parentColumnReader.currDefLevel = -1;
代码示例来源:origin: io.snappydata/snappy-spark-sql
private void readPageV1(DataPageV1 page) throws IOException {
this.pageValueCount = page.getValueCount();
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
ValuesReader dlReader;
// Initialize the decoders.
if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
}
int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
this.defColumn = new VectorizedRleValuesReader(bitWidth);
dlReader = this.defColumn;
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
byte[] bytes = page.getBytes().toByteArray();
rlReader.initFromPage(pageValueCount, bytes, 0);
int next = rlReader.getNextOffset();
dlReader.initFromPage(pageValueCount, bytes, next);
next = dlReader.getNextOffset();
initDataReader(page.getValueEncoding(), bytes, next);
} catch (IOException e) {
throw new IOException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: org.apache.drill.exec/drill-java-exec
repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
repetitionLevels.initFromPage(currentPageCount, in);
definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
definitionLevels.initFromPage(currentPageCount, in);
readPosInBytes = in.position();
if (!valueEncoding.usesDictionary()) {
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, in);
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, in);
代码示例来源:origin: dremio/dremio-oss
repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
repetitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
definitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
readPosInBytes = definitionLevels.getNextOffset();
if (!valueEncoding.usesDictionary()) {
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
代码示例来源:origin: org.apache.parquet/parquet-column
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
ValuesReader previousReader = this.dataColumn;
this.currentEncoding = dataEncoding;
this.pageValueCount = valueCount;
this.endOfPageValueCount = readValues + pageValueCount;
if (dataEncoding.usesDictionary()) {
if (dictionary == null) {
throw new ParquetDecodingException(
"could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
}
this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
} else {
this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
}
if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
bindToDictionary(dictionary);
} else {
bind(path.getType());
}
try {
dataColumn.initFromPage(pageValueCount, in);
} catch (IOException e) {
throw new ParquetDecodingException("could not read page in col " + path, e);
}
if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
previousReader != null && previousReader instanceof RequiresPreviousReader) {
// previous reader can only be set if reading sequentially
((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
}
}
代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
ValuesReader previousReader = this.dataColumn;
this.currentEncoding = dataEncoding;
this.pageValueCount = valueCount;
this.endOfPageValueCount = readValues + pageValueCount;
if (dataEncoding.usesDictionary()) {
if (dictionary == null) {
throw new ParquetDecodingException(
"could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
}
this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
} else {
this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
}
if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
bindToDictionary(dictionary);
} else {
bind(path.getType());
}
try {
dataColumn.initFromPage(pageValueCount, in);
} catch (IOException e) {
throw new ParquetDecodingException("could not read page in col " + path, e);
}
if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
previousReader != null && previousReader instanceof RequiresPreviousReader) {
// previous reader can only be set if reading sequentially
((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
}
}
内容来源于网络,如有侵权,请联系作者删除!