本文整理了Java中org.apache.parquet.column.Dictionary.getMaxId()
方法的一些代码示例,展示了Dictionary.getMaxId()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Dictionary.getMaxId()
方法的具体详情如下:
包路径:org.apache.parquet.column.Dictionary
类名称:Dictionary
方法名:getMaxId
暂无
代码示例来源:origin: apache/hive
@Override
public void setDictionary(Dictionary dictionary) {
int length = dictionary.getMaxId() + 1;
lookupTable = new ArrayList<T>();
for (int i = 0; i < length; i++) {
lookupTable.add(convert(dictionary.decodeToBinary(i)));
}
}
代码示例来源:origin: org.apache.parquet/parquet-avro
@Override
@SuppressWarnings("unchecked")
public void setDictionary(Dictionary dictionary) {
dict = (T[]) new Object[dictionary.getMaxId() + 1];
for (int i = 0; i <= dictionary.getMaxId(); i++) {
dict[i] = convert(dictionary.decodeToBinary(i));
}
}
代码示例来源:origin: org.apache.parquet/parquet-protobuf
@Override
public void setDictionary(Dictionary dictionary) {
dict = new Descriptors.EnumValueDescriptor[dictionary.getMaxId() + 1];
for (int i = 0; i <= dictionary.getMaxId(); i++) {
Binary binaryValue = dictionary.decodeToBinary(i);
dict[i] = translateEnumValue(binaryValue);
}
}
代码示例来源:origin: ai.h2o/h2o-parquet-parser
@Override
public void setDictionary(Dictionary dictionary) {
_dict = new String[dictionary.getMaxId() + 1];
for (int i = 0; i <= dictionary.getMaxId(); i++) {
_dict[i] = dictionary.decodeToBinary(i).toStringUsingUTF8();
}
}
代码示例来源:origin: dremio/dremio-oss
public static void printDictionary(ColumnDescriptor columnDescriptor, Dictionary localDictionary) {
System.out.println("Dictionary for column " + columnDescriptor.toString());
for (int i = 0; i < localDictionary.getMaxId(); ++i) {
switch (columnDescriptor.getType()) {
case INT32:
System.out.println(format("%d: %d", i, localDictionary.decodeToInt(i)));
break;
case INT64:
System.out.println(format("%d: %d", i, localDictionary.decodeToLong(i)));
break;
case INT96:
case BINARY:
case FIXED_LEN_BYTE_ARRAY:
System.out.println(format("%d: %s", i, new String(localDictionary.decodeToBinary(i).getBytesUnsafe())));
break;
case FLOAT:
System.out.println(format("%d: %f", i, localDictionary.decodeToFloat(i)));
break;
case DOUBLE:
System.out.println(format("%d: %f", i, localDictionary.decodeToDouble(i)));
break;
case BOOLEAN:
System.out.println(format("%d: %b", i, localDictionary.decodeToBoolean(i)));
break;
default:
break;
}
}
}
}
代码示例来源:origin: Netflix/iceberg
for (int i=0; i<=dict.getMaxId(); i++) {
switch (col.getType()) {
case BINARY: dictSet.add((T) conversion.apply(dict.decodeToBinary(i)));
代码示例来源:origin: dremio/dremio-oss
localIdToGlobalId = new int[pageReader.dictionary.getMaxId() + 1];
final VectorContainer vectorContainer = globalDictionaries.getDictionaries().get(schemaElement.getName());
switch (schemaElement.getType()) {
valueLookup.put(intVector.get(i), i);
for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToInt(i));
valueLookup.put(longVector.get(i), i);
for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToLong(i));
for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToBinary(i));
for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToFloat(i));
for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
localIdToGlobalId[i] = valueLookup.get(pageReader.dictionary.decodeToDouble(i));
for (int i = 0; i <= pageReader.dictionary.getMaxId(); ++i) {
localIdToGlobalId[i] = pageReader.dictionary.decodeToBoolean(i) ? 1 : 0;
this.dictionaryWidthBits = BytesUtils.getWidthFromMaxInt(pageReader.dictionary.getMaxId() - 1);
代码示例来源:origin: org.apache.parquet/parquet-hadoop
@SuppressWarnings("unchecked")
private <T extends Comparable<T>> Set<T> expandDictionary(ColumnChunkMetaData meta) throws IOException {
ColumnDescriptor col = new ColumnDescriptor(meta.getPath().toArray(), meta.getPrimitiveType(), -1, -1);
DictionaryPage page = dictionaries.readDictionaryPage(col);
// the chunk may not be dictionary-encoded
if (page == null) {
return null;
}
Dictionary dict = page.getEncoding().initDictionary(col, page);
Set dictSet = new HashSet<T>();
for (int i=0; i<=dict.getMaxId(); i++) {
switch(meta.getType()) {
case BINARY: dictSet.add(dict.decodeToBinary(i));
break;
case INT32: dictSet.add(dict.decodeToInt(i));
break;
case INT64: dictSet.add(dict.decodeToLong(i));
break;
case FLOAT: dictSet.add(dict.decodeToFloat(i));
break;
case DOUBLE: dictSet.add(dict.decodeToDouble(i));
break;
default:
LOG.warn("Unknown dictionary type{}", meta.getType());
}
}
return (Set<T>) dictSet;
}
代码示例来源:origin: org.lasersonlab.apache.parquet/parquet-hadoop
for (int i = 0; i <= dict.getMaxId(); i++) {
dictSet.add((T) dictValueProvider.apply(i));
代码示例来源:origin: dremio/dremio-oss
private static VectorContainer buildLongGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(64, true), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final BigIntVector longVector = input.addOrGet(field);
longVector.allocateNew();
SortedSet<Long> values = Sets.newTreeSet();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToLong(i));
}
}
if (existingDict != null) {
final BigIntVector existingDictValues = existingDict.getValueAccessorById(BigIntVector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(existingDictValues.get(i));
}
}
final Iterator<Long> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
longVector.setSafe(recordCount++, iter.next());
}
longVector.setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
代码示例来源:origin: dremio/dremio-oss
private static VectorContainer buildIntegerGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Int(32, true), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final IntVector intVector = input.addOrGet(field);
intVector.allocateNew();
final SortedSet<Integer> values = Sets.newTreeSet();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToInt(i));
}
}
if (existingDict != null) {
final IntVector existingDictValues = existingDict.getValueAccessorById(IntVector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(existingDictValues.get(i));
}
}
final Iterator<Integer> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
intVector.setSafe(recordCount++, iter.next());
}
intVector.setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
代码示例来源:origin: dremio/dremio-oss
private static VectorContainer buildFloatGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final Float4Vector floatVector = input.addOrGet(field);
floatVector.allocateNew();
SortedSet<Float> values = Sets.newTreeSet();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToFloat(i));
}
}
if (existingDict != null) {
final Float4Vector existingDictValues = existingDict.getValueAccessorById(Float4Vector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(existingDictValues.get(i));
}
}
final Iterator<Float> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
floatVector.setSafe(recordCount++, iter.next());
}
floatVector.setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
代码示例来源:origin: dremio/dremio-oss
private static VectorContainer buildDoubleGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final Float8Vector doubleVector = input.addOrGet(field);
doubleVector.allocateNew();
SortedSet<Double> values = Sets.newTreeSet();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToDouble(i));
}
}
if (existingDict != null) {
final Float8Vector existingDictValues = existingDict.getValueAccessorById(Float8Vector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(existingDictValues.get(i));
}
}
final Iterator<Double> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
doubleVector.setSafe(recordCount++, iter.next());
}
doubleVector.setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
代码示例来源:origin: dremio/dremio-oss
private static VectorContainer buildBinaryGlobalDictionary(List<Dictionary> dictionaries, VectorContainer existingDict, ColumnDescriptor columnDescriptor, BufferAllocator bufferAllocator) {
final Field field = new Field(SchemaPath.getCompoundPath(columnDescriptor.getPath()).getAsUnescapedPath(), true, new ArrowType.Binary(), null);
final VectorContainer input = new VectorContainer(bufferAllocator);
final VarBinaryVector binaryVector = input.addOrGet(field);
binaryVector.allocateNew();
final SortedSet<Binary> values = new TreeSet<>();
for (Dictionary dictionary : dictionaries) {
for (int i = 0; i <= dictionary.getMaxId(); ++i) {
values.add(dictionary.decodeToBinary(i));
}
}
if (existingDict != null) {
final VarBinaryVector existingDictValues = existingDict.getValueAccessorById(VarBinaryVector.class, 0).getValueVector();
for (int i = 0; i < existingDict.getRecordCount(); ++i) {
values.add(Binary.fromConstantByteArray(existingDictValues.get(i)));
}
}
final Iterator<Binary> iter = values.iterator();
int recordCount = 0;
while (iter.hasNext()) {
final byte[] data = iter.next().getBytes();
binaryVector.setSafe(recordCount++, data, 0, data.length);
}
binaryVector.setValueCount(recordCount);
input.setRecordCount(recordCount);
input.buildSchema(BatchSchema.SelectionVectorMode.NONE);
return input;
}
内容来源于网络,如有侵权,请联系作者删除!