org.apache.parquet.column.Dictionary类的使用及代码示例

x33g5p2x  于2022-01-18 转载在 其他  
字(9.6k)|赞(0)|评价(0)|浏览(360)

本文整理了Java中org.apache.parquet.column.Dictionary类的一些代码示例,展示了Dictionary类的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Dictionary类的具体详情如下:
包路径:org.apache.parquet.column.Dictionary
类名称:Dictionary

Dictionary介绍

[英]a dictionary to decode dictionary based encodings
[中]

代码示例

代码示例来源:origin: apache/hive

@Override
public byte[] readChar(int id) {
 return dict.decodeToBinary(id).getBytesUnsafe();
}

代码示例来源:origin: apache/hive

@Override
public double readDouble(int id) {
 return dict.decodeToDouble(id);
}

代码示例来源:origin: apache/hive

@Override
public double readDouble(int id) {
 return dict.decodeToFloat(id);
}

代码示例来源:origin: apache/hive

@Override
public void setDictionary(Dictionary dictionary) {
 int length = dictionary.getMaxId() + 1;
 lookupTable = new ArrayList<T>();
 for (int i = 0; i < length; i++) {
  lookupTable.add(convert(dictionary.decodeToBinary(i)));
 }
}

代码示例来源:origin: org.apache.spark/spark-sql_2.10

for (int i = rowId; i < rowId + num; ++i) {
  if (!column.isNullAt(i)) {
   column.putInt(i, dictionary.decodeToInt(dictionaryIds.getDictId(i)));
 for (int i = rowId; i < rowId + num; ++i) {
  if (!column.isNullAt(i)) {
   column.putByte(i, (byte) dictionary.decodeToInt(dictionaryIds.getDictId(i)));
 for (int i = rowId; i < rowId + num; ++i) {
  if (!column.isNullAt(i)) {
   column.putShort(i, (short) dictionary.decodeToInt(dictionaryIds.getDictId(i)));
 for (int i = rowId; i < rowId + num; ++i) {
  if (!column.isNullAt(i)) {
   column.putLong(i, dictionary.decodeToLong(dictionaryIds.getDictId(i)));
  if (!column.isNullAt(i)) {
   column.putLong(i,
    DateTimeUtils.fromMillis(dictionary.decodeToLong(dictionaryIds.getDictId(i))));
for (int i = rowId; i < rowId + num; ++i) {
 if (!column.isNullAt(i)) {
  column.putFloat(i, dictionary.decodeToFloat(dictionaryIds.getDictId(i)));
for (int i = rowId; i < rowId + num; ++i) {
 if (!column.isNullAt(i)) {
  column.putDouble(i, dictionary.decodeToDouble(dictionaryIds.getDictId(i)));
   Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(i));

代码示例来源:origin: apache/hive

@Override
public long readLong(int id) {
 return dict.decodeToLong(id);
}

代码示例来源:origin: apache/hive

@Override
public double readDouble(int id) {
 return dict.decodeToInt(id);
}

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildDoubleGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final Float8Vector doubleVector = input.addOrGet(field);
 doubleVector.allocateNew();
 SortedSet<Double> values = Sets.newTreeSet();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToDouble(i));
  }
 }
 if (existingDict != null) {
  final Float8Vector existingDictValues = existingDict.getValueAccessorById(Float8Vector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(existingDictValues.get(i));
  }
 }
 final Iterator<Double> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  doubleVector.setSafe(recordCount++, iter.next());
 }
 doubleVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildLongGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final BigIntVector longVector = input.addOrGet(field);
 longVector.allocateNew();
 SortedSet<Long> values = Sets.newTreeSet();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToLong(i));
  }
 }
 if (existingDict != null) {
  final BigIntVector existingDictValues = existingDict.getValueAccessorById(BigIntVector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(existingDictValues.get(i));
  }
 }
 final Iterator<Long> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  longVector.setSafe(recordCount++, iter.next());
 }
 longVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildFloatGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final Float4Vector floatVector = input.addOrGet(field);
 floatVector.allocateNew();
 SortedSet<Float> values = Sets.newTreeSet();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToFloat(i));
  }
 }
 if (existingDict != null) {
  final Float4Vector existingDictValues = existingDict.getValueAccessorById(Float4Vector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(existingDictValues.get(i));
  }
 }
 final Iterator<Float> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  floatVector.setSafe(recordCount++, iter.next());
 }
 floatVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

代码示例来源:origin: dremio/dremio-oss

private static VectorContainer buildIntegerGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
 final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(32, true), null);
 final VectorContainer input = new VectorContainer(bufferAllocator);
 final IntVector intVector = input.addOrGet(field);
 intVector.allocateNew();
 final SortedSet<Integer> values = Sets.newTreeSet();
 for (Dictionary dictionary : dictionaries) {
  for (int i = 0; i <= dictionary.getMaxId(); ++i) {
   values.add(dictionary.decodeToInt(i));
  }
 }
 if (existingDict != null) {
  final IntVector existingDictValues = existingDict.getValueAccessorById(IntVector.class, 0).getValueVector();
  for (int i = 0; i < existingDict.getRecordCount(); ++i) {
   values.add(existingDictValues.get(i));
  }
 }
 final Iterator<Integer> iter = values.iterator();
 int recordCount = 0;
 while (iter.hasNext()) {
  intVector.setSafe(recordCount++, iter.next());
 }
 intVector.setValueCount(recordCount);
 input.setRecordCount(recordCount);
 input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
 return input;
}

代码示例来源:origin: apache/hive

@Override
public boolean readBoolean(int id) {
 return dict.decodeToBoolean(id);
}

代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-hadoop

for (int i = 0; i <= dict.getMaxId(); i++) {
 dictSet.add((T) dictValueProvider.apply(i));

代码示例来源:origin: org.apache.spark/spark-sql

for (int i = rowId; i < rowId + num; ++i) {
  if (!column.isNullAt(i)) {
   column.putInt(i, dictionary.decodeToInt(dictionaryIds.getDictId(i)));
 for (int i = rowId; i < rowId + num; ++i) {
  if (!column.isNullAt(i)) {
   column.putByte(i, (byte) dictionary.decodeToInt(dictionaryIds.getDictId(i)));
 for (int i = rowId; i < rowId + num; ++i) {
  if (!column.isNullAt(i)) {
   column.putShort(i, (short) dictionary.decodeToInt(dictionaryIds.getDictId(i)));
 for (int i = rowId; i < rowId + num; ++i) {
  if (!column.isNullAt(i)) {
   column.putLong(i, dictionary.decodeToLong(dictionaryIds.getDictId(i)));
  if (!column.isNullAt(i)) {
   column.putLong(i,
    DateTimeUtils.fromMillis(dictionary.decodeToLong(dictionaryIds.getDictId(i))));
for (int i = rowId; i < rowId + num; ++i) {
 if (!column.isNullAt(i)) {
  column.putFloat(i, dictionary.decodeToFloat(dictionaryIds.getDictId(i)));
for (int i = rowId; i < rowId + num; ++i) {
 if (!column.isNullAt(i)) {
  column.putDouble(i, dictionary.decodeToDouble(dictionaryIds.getDictId(i)));
  for (int i = rowId; i < rowId + num; ++i) {

代码示例来源:origin: org.apache.parquet/parquet-avro

@Override
@SuppressWarnings("unchecked")
public void setDictionary(Dictionary dictionary) {
 dict = (T[]) new Object[dictionary.getMaxId() + 1];
 for (int i = 0; i <= dictionary.getMaxId(); i++) {
  dict[i] = convert(dictionary.decodeToBinary(i));
 }
}

代码示例来源:origin: apache/hive

@Override
public float readFloat(int id) {
 return dict.decodeToLong(id);
}

代码示例来源:origin: apache/hive

@Override
public long readInteger(int id) {
 return dict.decodeToInt(id);
}

代码示例来源:origin: apache/hive

@Override
public byte[] readString(int id) {
 return convertToBytes(dict.decodeToBoolean(id));
}

代码示例来源:origin: dremio/dremio-oss

public static void printDictionary(ColumnDescriptor columnDescriptor, Dictionary localDictionary) {
  System.out.println("Dictionary for column " + columnDescriptor.toString());
  for (int i = 0; i < localDictionary.getMaxId(); ++i) {
   switch (columnDescriptor.getType()) {
    case INT32:
     System.out.println(format("%d: %d", i, localDictionary.decodeToInt(i)));
     break;
    case INT64:
     System.out.println(format("%d: %d", i, localDictionary.decodeToLong(i)));
     break;
    case INT96:
    case BINARY:
    case FIXED_LEN_BYTE_ARRAY:
     System.out.println(format("%d: %s", i, new String(localDictionary.decodeToBinary(i).getBytesUnsafe())));
     break;
    case FLOAT:
     System.out.println(format("%d: %f", i, localDictionary.decodeToFloat(i)));
     break;
    case DOUBLE:
     System.out.println(format("%d: %f", i, localDictionary.decodeToDouble(i)));
     break;
    case BOOLEAN:
     System.out.println(format("%d: %b", i, localDictionary.decodeToBoolean(i)));
     break;
    default:
     break;
   }
  }
 }
}

代码示例来源:origin: apache/hive

@Override
public byte[] readDecimal(int id) {
 return dict.decodeToBinary(id).getBytesUnsafe();
}

相关文章