本文整理了Java中org.apache.parquet.column.Encoding
类的一些代码示例,展示了Encoding
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Encoding
类的具体详情如下:
包路径:org.apache.parquet.column.Encoding
类名称:Encoding
[英]encoding of the data
[中]数据的编码
代码示例来源:origin: apache/hive
public BaseVectorizedColumnReader(
ColumnDescriptor descriptor,
PageReader pageReader,
boolean skipTimestampConversion,
Type parquetType, TypeInfo hiveType) throws IOException {
this.descriptor = descriptor;
this.type = parquetType;
this.pageReader = pageReader;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
this.skipTimestampConversion = skipTimestampConversion;
this.hiveType = hiveType;
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = ParquetDataColumnReaderFactory
.getDataColumnReaderByTypeOnDictionary(parquetType.asPrimitiveType(), hiveType,
dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage),
skipTimestampConversion);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
}
代码示例来源:origin: apache/hive
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount)
throws IOException {
this.pageValueCount = valueCount;
this.endOfPageValueCount = valuesRead + pageValueCount;
if (dataEncoding.usesDictionary()) {
this.dataColumn = null;
if (dictionary == null) {
throw new IOException(
"could not read page in col " + descriptor +
" as the dictionary was missing for encoding " + dataEncoding);
}
dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary
.getDictionary()), skipTimestampConversion);
this.isCurrentPageDictionaryEncoded = true;
} else {
dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion);
this.isCurrentPageDictionaryEncoded = false;
}
try {
dataColumn.initFromPage(pageValueCount, in);
} catch (IOException e) {
throw new IOException("could not read page in col " + descriptor, e);
}
}
代码示例来源:origin: apache/hive
private void readPageV1(DataPageV1 page) {
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL);
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
LOG.debug("page size " + bytes.size() + " bytes and " + pageValueCount + " records");
ByteBufferInputStream in = bytes.toInputStream();
LOG.debug("reading repetition levels at " + in.position());
rlReader.initFromPage(pageValueCount, in);
LOG.debug("reading definition levels at " + in.position());
dlReader.initFromPage(pageValueCount, in);
LOG.debug("reading data at " + in.position());
initDataReader(page.getValueEncoding(), in, page.getValueCount());
} catch (IOException e) {
throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: org.apache.drill.exec/drill-java-exec
private void readDictionaryPage(final PageHeader pageHeader,
final ColumnReader<?> parentStatus) throws IOException {
int compressedSize = pageHeader.getCompressed_page_size();
int uncompressedSize = pageHeader.getUncompressed_page_size();
final DrillBuf dictionaryData = readPage(pageHeader, compressedSize, uncompressedSize);
allocatedDictionaryBuffers.add(dictionaryData);
DictionaryPage page = new DictionaryPage(
asBytesInput(dictionaryData, 0, uncompressedSize),
pageHeader.uncompressed_page_size,
pageHeader.dictionary_page_header.num_values,
valueOf(pageHeader.dictionary_page_header.encoding.name()));
this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}
代码示例来源:origin: org.apache.spark/spark-sql
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in) throws IOException {
this.endOfPageValueCount = valuesRead + pageValueCount;
if (dataEncoding.usesDictionary()) {
this.dataColumn = null;
if (dictionary == null) {
throw new IOException(
"could not read page in col " + descriptor +
" as the dictionary was missing for encoding " + dataEncoding);
}
@SuppressWarnings("deprecation")
Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression
if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) {
throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
}
this.dataColumn = new VectorizedRleValuesReader();
this.isCurrentPageDictionaryEncoded = true;
} else {
if (dataEncoding != Encoding.PLAIN) {
throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
}
this.dataColumn = new VectorizedPlainValuesReader();
this.isCurrentPageDictionaryEncoded = false;
}
try {
dataColumn.initFromPage(pageValueCount, in);
} catch (IOException e) {
throw new IOException("could not read page in col " + descriptor, e);
}
}
代码示例来源:origin: org.apache.drill.exec/drill-java-exec
repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
repetitionLevels.initFromPage(currentPageCount, in);
definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
definitionLevels.initFromPage(currentPageCount, in);
readPosInBytes = in.position();
if (!valueEncoding.usesDictionary()) {
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, in);
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, in);
if (valueEncoding.usesDictionary()) {
代码示例来源:origin: io.prestosql/presto-parquet
private static Set<org.apache.parquet.column.Encoding> readEncodings(List<Encoding> encodings)
{
Set<org.apache.parquet.column.Encoding> columnEncodings = new HashSet<>();
for (Encoding encoding : encodings) {
columnEncodings.add(org.apache.parquet.column.Encoding.valueOf(encoding.name()));
}
return Collections.unmodifiableSet(columnEncodings);
}
代码示例来源:origin: org.apache.parquet/parquet-column
/**
* To read decoded values that don't require a dictionary
*
* @param descriptor the column to read
* @param valuesType the type of values
* @return the proper values reader for the given column
* @throws UnsupportedOperationException if the encoding is dictionary based
*/
public ValuesReader getValuesReader(ColumnDescriptor descriptor, ValuesType valuesType) {
throw new UnsupportedOperationException("Error decoding " + descriptor + ". " + this.name() + " is dictionary based");
}
代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column
@Override
public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) {
return RLE_DICTIONARY.getDictionaryBasedValuesReader(descriptor, valuesType, dictionary);
}
代码示例来源:origin: org.apache.spark/spark-sql_2.11
private void readPageV1(DataPageV1 page) throws IOException {
this.pageValueCount = page.getValueCount();
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
ValuesReader dlReader;
// Initialize the decoders.
if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
}
int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
this.defColumn = new VectorizedRleValuesReader(bitWidth);
dlReader = this.defColumn;
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
ByteBufferInputStream in = bytes.toInputStream();
rlReader.initFromPage(pageValueCount, in);
dlReader.initFromPage(pageValueCount, in);
initDataReader(page.getValueEncoding(), in);
} catch (IOException e) {
throw new IOException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: dremio/dremio-oss
private void readDictionaryPage(final PageHeader pageHeader,
final ColumnReader<?> parentStatus) throws IOException {
int compressedSize = pageHeader.getCompressed_page_size();
int uncompressedSize = pageHeader.getUncompressed_page_size();
final ArrowBuf dictionaryData = allocateDictionaryBuffer(uncompressedSize);
readPage(pageHeader, compressedSize, uncompressedSize, dictionaryData);
DictionaryPage page = new DictionaryPage(
asBytesInput(dictionaryData, 0, uncompressedSize),
pageHeader.uncompressed_page_size,
pageHeader.dictionary_page_header.num_values,
valueOf(pageHeader.dictionary_page_header.encoding.name()));
this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
}
代码示例来源:origin: org.apache.spark/spark-sql_2.11
private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in) throws IOException {
this.endOfPageValueCount = valuesRead + pageValueCount;
if (dataEncoding.usesDictionary()) {
this.dataColumn = null;
if (dictionary == null) {
throw new IOException(
"could not read page in col " + descriptor +
" as the dictionary was missing for encoding " + dataEncoding);
}
@SuppressWarnings("deprecation")
Encoding plainDict = Encoding.PLAIN_DICTIONARY; // var to allow warning suppression
if (dataEncoding != plainDict && dataEncoding != Encoding.RLE_DICTIONARY) {
throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
}
this.dataColumn = new VectorizedRleValuesReader();
this.isCurrentPageDictionaryEncoded = true;
} else {
if (dataEncoding != Encoding.PLAIN) {
throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding);
}
this.dataColumn = new VectorizedPlainValuesReader();
this.isCurrentPageDictionaryEncoded = false;
}
try {
dataColumn.initFromPage(pageValueCount, in);
} catch (IOException e) {
throw new IOException("could not read page in col " + descriptor, e);
}
}
代码示例来源:origin: dremio/dremio-oss
repetitionLevels = rlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.REPETITION_LEVEL);
repetitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
definitionLevels = dlEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.DEFINITION_LEVEL);
definitionLevels.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
readPosInBytes = definitionLevels.getNextOffset();
if (!valueEncoding.usesDictionary()) {
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
valueReader = valueEncoding.getValuesReader(parentColumnReader.columnDescriptor, ValuesType.VALUES);
valueReader.initFromPage(currentPageCount, pageDataBuffer, (int) readPosInBytes);
if (valueEncoding.usesDictionary()) {
代码示例来源:origin: prestosql/presto
private static Set<org.apache.parquet.column.Encoding> readEncodings(List<Encoding> encodings)
{
Set<org.apache.parquet.column.Encoding> columnEncodings = new HashSet<>();
for (Encoding encoding : encodings) {
columnEncodings.add(org.apache.parquet.column.Encoding.valueOf(encoding.name()));
}
return Collections.unmodifiableSet(columnEncodings);
}
代码示例来源:origin: org.apache.parquet/parquet-column
/**
* To read decoded values that require a dictionary
*
* @param descriptor the column to read
* @param valuesType the type of values
* @param dictionary the dictionary
* @return the proper values reader for the given column
* @throws UnsupportedOperationException if the encoding is not dictionary based
*/
public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) {
throw new UnsupportedOperationException(this.name() + " is not dictionary based");
}
代码示例来源:origin: org.apache.parquet/parquet-column
@Override
public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) {
return RLE_DICTIONARY.getDictionaryBasedValuesReader(descriptor, valuesType, dictionary);
}
代码示例来源:origin: Netflix/iceberg
if (dataEncoding.usesDictionary()) {
if (dict == null) {
throw new ParquetDecodingException(
"could not read page in col " + desc + " as the dictionary was missing for encoding " + dataEncoding);
this.values = dataEncoding.getDictionaryBasedValuesReader(desc, VALUES, dict);
} else {
this.values = dataEncoding.getValuesReader(desc, VALUES);
代码示例来源:origin: org.apache.spark/spark-sql_2.10
public VectorizedColumnReader(ColumnDescriptor descriptor, PageReader pageReader)
throws IOException {
this.descriptor = descriptor;
this.pageReader = pageReader;
this.maxDefLevel = descriptor.getMaxDefinitionLevel();
DictionaryPage dictionaryPage = pageReader.readDictionaryPage();
if (dictionaryPage != null) {
try {
this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage);
this.isCurrentPageDictionaryEncoded = true;
} catch (IOException e) {
throw new IOException("could not decode the dictionary for " + descriptor, e);
}
} else {
this.dictionary = null;
this.isCurrentPageDictionaryEncoded = false;
}
this.totalValueCount = pageReader.getTotalValueCount();
if (totalValueCount == 0) {
throw new IOException("totalValueCount == 0");
}
}
代码示例来源:origin: org.apache.spark/spark-sql
private void readPageV1(DataPageV1 page) throws IOException {
this.pageValueCount = page.getValueCount();
ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL);
ValuesReader dlReader;
// Initialize the decoders.
if (page.getDlEncoding() != Encoding.RLE && descriptor.getMaxDefinitionLevel() != 0) {
throw new UnsupportedOperationException("Unsupported encoding: " + page.getDlEncoding());
}
int bitWidth = BytesUtils.getWidthFromMaxInt(descriptor.getMaxDefinitionLevel());
this.defColumn = new VectorizedRleValuesReader(bitWidth);
dlReader = this.defColumn;
this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader);
this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader);
try {
BytesInput bytes = page.getBytes();
ByteBufferInputStream in = bytes.toInputStream();
rlReader.initFromPage(pageValueCount, in);
dlReader.initFromPage(pageValueCount, in);
initDataReader(page.getValueEncoding(), in);
} catch (IOException e) {
throw new IOException("could not read page " + page + " in col " + descriptor, e);
}
}
代码示例来源:origin: org.apache.drill.exec/drill-java-exec
private void readDictionaryPageData(final ReadStatus readStatus, final ColumnReader<?> parentStatus)
throws UserException {
try {
pageHeader = readStatus.getPageHeader();
int uncompressedSize = pageHeader.getUncompressed_page_size();
final DrillBuf dictionaryData = getDecompressedPageData(readStatus);
Stopwatch timer = Stopwatch.createStarted();
allocatedDictionaryBuffers.add(dictionaryData);
DictionaryPage page = new DictionaryPage(asBytesInput(dictionaryData, 0, uncompressedSize),
pageHeader.uncompressed_page_size, pageHeader.dictionary_page_header.num_values,
valueOf(pageHeader.dictionary_page_header.encoding.name()));
this.dictionary = page.getEncoding().initDictionary(parentStatus.columnDescriptor, page);
long timeToDecode = timer.elapsed(TimeUnit.NANOSECONDS);
stats.timeDictPageDecode.addAndGet(timeToDecode);
} catch (Exception e) {
handleAndThrowException(e, "Error decoding dictionary page.");
}
}
内容来源于网络,如有侵权,请联系作者删除!