public class CompositeWritable2 implements WritableComparable<CompositeWritable2> {
private Text textData1;
private LongWritable longData;
private Text textData2;
static {
WritableComparator.define(CompositeWritable2.class, new Comparator());
}
/**
* Empty constructor
*/
public CompositeWritable2() {
textData1 = new Text();
longData = new LongWritable();
textData2 = new Text();
}
/**
* Comparator
*
* @author CuriousCat
*/
public static class Comparator extends WritableComparator {
private static final Text.Comparator TEXT_COMPARATOR = new Text.Comparator();
private static final LongWritable.Comparator LONG_COMPARATOR = new LongWritable.Comparator();
public Comparator() {
super(CompositeWritable2.class);
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.io.WritableComparator#compare(byte[], int, int, byte[], int, int)
*/
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
int cmp;
try {
// Find the length of the first text property
int textData11Len = WritableUtils.decodeVIntSize(b1[s1]) + readVInt(b1, s1);
int textData12Len = WritableUtils.decodeVIntSize(b2[s2]) + readVInt(b2, s2);
// Compare the first text data as bytes
cmp = TEXT_COMPARATOR.compare(b1, s1, textData11Len, b2, s2, textData12Len);
if (cmp != 0) {
return cmp;
}
// Read and compare the next 8 bytes starting from the length of first text property.
// The reason for hard coding 8 is, because the second property is long.
cmp = LONG_COMPARATOR.compare(b1, textData11Len, 8, b2, textData12Len, 8);
if (cmp != 0) {
return cmp;
}
// Move the index to the end of the second long property
textData11Len += 8;
textData12Len += 8;
// Find the length of the second text property
int textData21Len = WritableUtils.decodeVIntSize(b1[textData11Len]) + readVInt(b1, textData11Len);
int textData22Len = WritableUtils.decodeVIntSize(b2[textData12Len]) + readVInt(b2, textData12Len);
// Compare the second text data as bytes
return TEXT_COMPARATOR.compare(b1, textData11Len, textData21Len, b2, textData12Len, textData22Len);
} catch (IOException ex) {
throw new IllegalArgumentException("Failed in CompositeWritable's RawComparator!", ex);
}
}
}
/**
* @return the textData1
*/
public Text getTextData1() {
return textData1;
}
/**
* @return the longData
*/
public LongWritable getLongData() {
return longData;
}
/**
* @return the textData2
*/
public Text getTextData2() {
return textData2;
}
/**
* Setter method
*/
public void set(Text textData1, LongWritable longData, Text textData2) {
this.textData1 = textData1;
this.longData = longData;
this.textData2 = textData2;
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
*/
@Override
public void write(DataOutput out) throws IOException {
textData1.write(out);
longData.write(out);
textData2.write(out);
}
/*
* (non-Javadoc)
*
* @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
*/
@Override
public void readFields(DataInput in) throws IOException {
textData1.readFields(in);
longData.readFields(in);
textData2.readFields(in);
}
/*
* (non-Javadoc)
*
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
@Override
public int compareTo(CompositeWritable2 o) {
int cmp = textData1.compareTo(o.getTextData1());
if (cmp != 0) {
return cmp;
}
cmp = longData.compareTo(o.getLongData());
if (cmp != 0) {
return cmp;
}
return textData2.compareTo(o.getTextData2());
}
}
2条答案
按热度按时间ulydmbyx1#
我知道我在回答一个老问题。
下面是为writeablecomparable编写rawcomarator的另一个示例
fhity93d2#
rawcomparator直接对对象的字节表示进行操作
不是每个map reduce程序都必须使用它
mapreduce基本上是一个批处理系统,不适合交互式分析。不能在几秒钟或更短的时间内运行查询并返回结果。查询通常需要几分钟或更长的时间,因此最好脱机使用,因为处理循环中没有人在等待结果。
如果您仍然想优化map reduce作业所花费的时间,那么您必须使用rawcomarator。
比较器的使用:
中间键值对已经从Map器传递到了reducer。在这些值从mapper到达reducer之前,将执行洗牌和排序步骤。
排序得到了改进,因为rawcomarator将按字节比较键。如果不使用rawcomparator,则必须对中间密钥进行完全反序列化才能执行比较。
例子:
在上面的例子中,我们没有直接实现rawcomarator。相反,我们扩展了writeablecomparator,它在内部实现rawcomarator。
有关更多详细信息,请参阅本文。