本文整理了Java中org.apache.kylin.common.util.Dictionary
类的一些代码示例,展示了Dictionary
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Dictionary
类的具体详情如下:
包路径:org.apache.kylin.common.util.Dictionary
类名称:Dictionary
[英]A bi-way dictionary that maps from dimension/column values to IDs and vice versa. By storing IDs instead of real values, the size of cube is significantly reduced. - IDs are smallest integers possible for the cardinality of a column, for the purpose of minimal storage space - IDs preserve ordering of values, such that range query can be applied to IDs directly A dictionary once built, is immutable. This allows optimal memory footprint by e.g. flatten the Trie structure into a byte array, replacing node pointers with array offsets.
[中]
代码示例来源:origin: apache/kylin
@Override
public List<ByteArray> reEncodeDictionary(List<ByteArray> value, MeasureDesc measureDesc, Map<TblColRef, Dictionary<String>> oldDicts, Map<TblColRef, Dictionary<String>> newDicts) {
TblColRef colRef = getRawColumn(measureDesc.getFunction());
Dictionary<String> sourceDict = oldDicts.get(colRef);
Dictionary<String> mergedDict = newDicts.get(colRef);
int valueSize = value.size();
byte[] newIdBuf = new byte[valueSize * mergedDict.getSizeOfId()];
int bufOffset = 0;
for (ByteArray c : value) {
int oldId = BytesUtil.readUnsigned(c.array(), c.offset(), c.length());
int newId;
String v = sourceDict.getValueFromId(oldId);
if (v == null) {
newId = mergedDict.nullId();
} else {
newId = mergedDict.getIdFromValue(v);
}
BytesUtil.writeUnsigned(newId, newIdBuf, bufOffset, mergedDict.getSizeOfId());
c.reset(newIdBuf, bufOffset, mergedDict.getSizeOfId());
bufOffset += mergedDict.getSizeOfId();
}
return value;
}
};
代码示例来源:origin: apache/kylin
public List<T> enumeratorValues() {
List<T> ret = Lists.newArrayListWithExpectedSize(getSize());
for (int i = getMinId(); i <= getMaxId(); i++) {
ret.add(getValueFromId(i));
}
return ret;
}
代码示例来源:origin: apache/kylin
public static void serialize(Dictionary<?> dict, OutputStream outputStream) {
try {
DataOutputStream out = new DataOutputStream(outputStream);
out.writeUTF(dict.getClass().getName());
dict.write(out);
out.flush();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
代码示例来源:origin: apache/kylin
public int getSize() {
return getMaxId() - getMinId() + 1;
}
代码示例来源:origin: apache/kylin
public ShrunkenDictionary<T> build(ValueSerializer<T> valueSerializer) {
return new ShrunkenDictionary<>(valueSerializer, fullDict.getMinId(), fullDict.getMaxId(),
fullDict.getSizeOfId(), fullDict.getSizeOfValue(), valueToIdMap);
}
}
代码示例来源:origin: apache/kylin
private TupleFilter translateFunctionTupleFilter(BuiltInFunctionTupleFilter builtInFunctionTupleFilter) {
if (!builtInFunctionTupleFilter.isValid())
return null;
TblColRef columnRef = builtInFunctionTupleFilter.getColumn();
Dictionary<?> dict = dimEncMap.getDictionary(columnRef);
if (dict == null)
return null;
CompareTupleFilter translated = new CompareTupleFilter(builtInFunctionTupleFilter.isReversed() ? FilterOperatorEnum.NOTIN : FilterOperatorEnum.IN);
translated.addChild(new ColumnTupleFilter(columnRef));
try {
int translatedInClauseMaxSize = KylinConfig.getInstanceFromEnv().getTranslatedInClauseMaxSize();
for (int i = dict.getMinId(); i <= dict.getMaxId(); i++) {
Object dictVal = dict.getValueFromId(i);
if ((Boolean) builtInFunctionTupleFilter.invokeFunction(dictVal)) {
translated.addChild(new ConstantTupleFilter(dictVal));
if (translated.getChildren().size() > translatedInClauseMaxSize) {
return null;
}
}
}
logger.debug("getting a in clause with {} children", translated.getChildren().size());
} catch (Exception e) {
logger.debug(e.getMessage());
return null;
}
return translated;
}
代码示例来源:origin: apache/kylin
@Override
public void serialize(Object value, ByteBuffer buf) {
int id = dict.getIdFromValue(value == null ? null : value.toString(), roundingFlag);
BytesUtil.writeUnsigned(id, dict.getSizeOfId(), buf);
}
代码示例来源:origin: org.apache.kylin/kylin-job
Dictionary<?> mergedDict = dictMgr.getDictionary(mergedCubeSegment.getDictResPath(col));
while (sourceDict.getSizeOfValue() > newKeyBuf.length - bufOffset || mergedDict.getSizeOfValue() > newKeyBuf.length - bufOffset) {
byte[] oldBuf = newKeyBuf;
newKeyBuf = new byte[2 * newKeyBuf.length];
int idInMergedDict;
int size = sourceDict.getValueBytesFromId(idInSourceDict, newKeyBuf, bufOffset);
if (size < 0) {
idInMergedDict = mergedDict.nullId();
} else {
idInMergedDict = mergedDict.getIdFromValueBytes(newKeyBuf, bufOffset, size);
BytesUtil.writeUnsigned(idInMergedDict, newKeyBuf, bufOffset, mergedDict.getSizeOfId());
bufOffset += mergedDict.getSizeOfId();
} else {
代码示例来源:origin: apache/kylin
public boolean isNullId(int id) {
int nullId = NULL_ID[getSizeOfId()];
return (nullId & id) == nullId;
}
代码示例来源:origin: apache/kylin
/**
* Convenient form of <code>getIdFromValue(value, 0)</code>
*/
final public int getIdFromValue(T value) throws IllegalArgumentException {
return getIdFromValue(value, 0);
}
代码示例来源:origin: apache/kylin
@Override
public Object deserialize(ByteBuffer in) {
int id = BytesUtil.readUnsigned(in, dict.getSizeOfId());
return dict.getValueFromId(id);
}
代码示例来源:origin: apache/kylin
buf.append(", ");
buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s));
logger.debug("Dictionary cardinality: " + dict.getSize());
logger.debug("Dictionary builder class: " + builder.getClass().getName());
logger.debug("Dictionary class: " + dict.getClass().getName());
代码示例来源:origin: apache/kylin
DataOutputStream dos = new DataOutputStream(bos);
shrunkenDict.write(dos);
dShrunkenDict.readFields(dis);
Assert.assertEquals(dict.getIdFromValue(value), dShrunkenDict.getIdFromValue(value));
代码示例来源:origin: apache/kylin
dict.readFields(in);
int[] rowIndex = new int[n];
for (int i = 0; i < n; i++) {
rowIndex[i] = dict.getIdFromValue(row[i]);
代码示例来源:origin: apache/kylin
@Override
public String[] getRow() {
int[] rowIndex = rowIndices.get(i);
String[] row = new String[rowIndex.length];
for (int x = 0; x < row.length; x++) {
row[x] = dict.getValueFromId(rowIndex[x]);
}
return row;
}
代码示例来源:origin: apache/kylin
private void saveDictionaryInfo(CubeSegment cubeSeg, TblColRef col, DictionaryInfo dictInfo)
throws IOException {
if (dictInfo == null)
return;
// work on copy instead of cached objects
CubeInstance cubeCopy = cubeSeg.getCubeInstance().latestCopyForWrite(); // get a latest copy
CubeSegment segCopy = cubeCopy.getSegmentById(cubeSeg.getUuid());
Dictionary<?> dict = dictInfo.getDictionaryObject();
segCopy.putDictResPath(col, dictInfo.getResourcePath());
segCopy.getRowkeyStats().add(new Object[] { col.getIdentity(), dict.getSize(), dict.getSizeOfId() });
CubeUpdate update = new CubeUpdate(cubeCopy);
update.setToUpdateSegs(segCopy);
updateCube(update);
}
代码示例来源:origin: apache/kylin
/**
* Returns the ID integer of given value. In case of not found
* <p>
* - if roundingFlag=0, throw IllegalArgumentException; <br>
* - if roundingFlag<0, the closest smaller ID integer if exist; <br>
* - if roundingFlag>0, the closest bigger ID integer if exist. <br>
* <p>
* The implementation often has cache, thus faster than the byte[] version getIdFromValueBytes()
*
* @throws IllegalArgumentException
* if value is not found in dictionary and rounding is off;
* or if rounding cannot find a smaller or bigger ID
*/
final public int getIdFromValue(T value, int roundingFlag) throws IllegalArgumentException {
if (isNullObjectForm(value))
return nullId();
int id = getIdFromValueImpl(value, roundingFlag);
if (id == -1) {
throw new IllegalArgumentException("Value : " + value + " not exists");
}
return id;
}
代码示例来源:origin: apache/kylin
void writeData(DataOutput out) throws IOException {
out.writeInt(rowIndices.size());
if (rowIndices.size() > 0) {
int n = rowIndices.get(0).length;
out.writeInt(n);
if (this.useDictionary == true) {
dict.write(out);
for (int i = 0; i < rowIndices.size(); i++) {
int[] row = rowIndices.get(i);
for (int j = 0; j < n; j++) {
out.writeInt(row[j]);
}
}
} else {
for (int i = 0; i < rowIndices.size(); i++) {
int[] row = rowIndices.get(i);
for (int j = 0; j < n; j++) {
// NULL_STR is tricky, but we don't want to break the current snapshots
out.writeUTF(dict.getValueFromId(row[j]) == null ? NULL_STR : dict.getValueFromId(row[j]));
}
}
}
}
}
代码示例来源:origin: apache/kylin
@Override
public boolean moveNext() throws IOException {
String minValue = null;
int curDictIndex = 0;
// multi-merge dictionary forest
for (int i = 0; i < dictionaryList.size(); i++) {
Dictionary<String> dict = dictionaryList.get(i);
if (dict == null)
continue;
int curKey = curKeys.get(i);
if (curKey > dict.getMaxId())
continue;
String curValue = dict.getValueFromId(curKey);
if (minValue == null || dataType.compare(minValue, curValue) > 0) {
minValue = curValue;
curDictIndex = i;
}
}
if (minValue == null) {
curValue = null;
return false;
}
curValue = minValue;
curKeys.set(curDictIndex, curKeys.get(curDictIndex) + 1);
return true;
}
代码示例来源:origin: org.apache.kylin/kylin-dictionary
@Override
public boolean moveNext() throws IOException {
if (curDictIndex < dictionaryList.size() && curKey <= curDict.getMaxId()) {
byte[] buffer = new byte[curDict.getSizeOfValue()];
int size = curDict.getValueBytesFromId(curKey, buffer, 0);
curValue = Bytes.copy(buffer, 0, size);
if (++curKey > curDict.getMaxId()) {
if (++curDictIndex < dictionaryList.size()) {
curDict = dictionaryList.get(curDictIndex);
curKey = curDict.getMinId();
}
}
return true;
}
curValue = null;
return false;
}
内容来源于网络,如有侵权,请联系作者删除!