
x33g5p2x  于2022-01-19 转载在 其他  



[英]To read decoded values that require a dictionary


代码示例来源:origin: apache/hive

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount)
  throws IOException {
 this.pageValueCount = valueCount;
 this.endOfPageValueCount = valuesRead + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  this.dataColumn = null;
  if (dictionary == null) {
   throw new IOException(
     "could not read page in col " + descriptor +
       " as the dictionary was missing for encoding " + dataEncoding);
  dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
    dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary
      .getDictionary()), skipTimestampConversion);
  this.isCurrentPageDictionaryEncoded = true;
 } else {
  dataColumn = ParquetDataColumnReaderFactory.getDataColumnReaderByType(type.asPrimitiveType(), hiveType,
    dataEncoding.getValuesReader(descriptor, VALUES), skipTimestampConversion);
  this.isCurrentPageDictionaryEncoded = false;
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new IOException("could not read page in col " + descriptor, e);

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column

public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) {
 return RLE_DICTIONARY.getDictionaryBasedValuesReader(descriptor, valuesType, dictionary);

代码示例来源:origin: org.apache.parquet/parquet-column

public ValuesReader getDictionaryBasedValuesReader(ColumnDescriptor descriptor, ValuesType valuesType, Dictionary dictionary) {
 return RLE_DICTIONARY.getDictionaryBasedValuesReader(descriptor, valuesType, dictionary);

代码示例来源:origin: Netflix/iceberg

"could not read page in col " + desc + " as the dictionary was missing for encoding " + dataEncoding);
 this.values = dataEncoding.getDictionaryBasedValuesReader(desc, VALUES, dict);
} else {
 this.values = dataEncoding.getValuesReader(desc, VALUES);

代码示例来源:origin: org.apache.parquet/parquet-column

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
 ValuesReader previousReader = this.dataColumn;
 this.currentEncoding = dataEncoding;
 this.pageValueCount = valueCount;
 this.endOfPageValueCount = readValues + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  if (dictionary == null) {
   throw new ParquetDecodingException(
     "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
  this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
 } else {
  this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
 if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
 } else {
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page in col " + path, e);
 if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
   previousReader != null && previousReader instanceof RequiresPreviousReader) {
  // previous reader can only be set if reading sequentially
  ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-column

private void initDataReader(Encoding dataEncoding, ByteBufferInputStream in, int valueCount) {
 ValuesReader previousReader = this.dataColumn;
 this.currentEncoding = dataEncoding;
 this.pageValueCount = valueCount;
 this.endOfPageValueCount = readValues + pageValueCount;
 if (dataEncoding.usesDictionary()) {
  if (dictionary == null) {
   throw new ParquetDecodingException(
     "could not read page in col " + path + " as the dictionary was missing for encoding " + dataEncoding);
  this.dataColumn = dataEncoding.getDictionaryBasedValuesReader(path, VALUES, dictionary);
 } else {
  this.dataColumn = dataEncoding.getValuesReader(path, VALUES);
 if (dataEncoding.usesDictionary() && converter.hasDictionarySupport()) {
 } else {
 try {
  dataColumn.initFromPage(pageValueCount, in);
 } catch (IOException e) {
  throw new ParquetDecodingException("could not read page in col " + path, e);
 if (CorruptDeltaByteArrays.requiresSequentialReads(writerVersion, dataEncoding) &&
   previousReader != null && previousReader instanceof RequiresPreviousReader) {
  // previous reader can only be set if reading sequentially
  ((RequiresPreviousReader) dataColumn).setPreviousReader(previousReader);
