hadoop mapreduce仅按值排序

beq87vna  于 2021-05-30  发布在  Hadoop
关注(0)|答案(3)|浏览(324)

有没有办法只按值对mapreduce输出进行排序,而不改变key和value的输出顺序?
原始输出如下(按键排序):

A 1

B 2

C 1

D 3

我需要这样的输出(按值排序):

D 3

B 2

A 1

C 1

我试图通过添加另一个排序作业 Inversemapper 要交换键和值,以便按值对输出进行排序,这是可行的,但是输出如下所示:

3 D

2 B

1 A

1 C

是否有反转键和值的输出格式的方法?
或者有没有其他方法只按值排序?
谢谢

txu3uszq

txu3uszq1#

可以使用辅助排序根据值进行排序。定义复合键并重写sortcomparator以根据值进行排序。这将在缩减器中提供已排序的值。

70gysomp

70gysomp2#

m/r总是按键排序。如果要按值排序,则需要创建另一个将要排序的值Map到键的作业。

r8uurelv

r8uurelv3#

您可以使用自定义值和实现writablecomparable接口来实现compareto(),也可以继承writablecomparator类来重写compare()方法。选择权在你。下面给出的是定制键和定制值类
客户密钥.java

package in.aniruddha.mapreduce.custFormat;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;

public class CustKey implements Writable {
    protected Text customerId;
    //default constructor
    public CustKey()
    {
        super();
        customerId=new Text();
    }
    public CustKey(Text customerId)
    {
        super();
        this.customerId=customerId;
    }
    public CustKey(String customerId)
    {
        super();
        this.customerId=new Text(customerId);
    }
    public CustKey(CustKey k)
    {
        super();
        this.customerId=k.customerId;
    }
    /**
     * @return the customerId
     */
    public Text getCustomerId() {
        return customerId;
    }
    /**
     * @param customerId the customerId to set
     */
    public void setCustomerId(Text customerId) {
        this.customerId = customerId;
    }
    public void setCustomerId(String customerId) {
        this.customerId = new Text(customerId);
    }

    public void readFields(DataInput arg0) throws IOException {
        this.customerId.readFields(arg0);
    }

    public void write(DataOutput arg0) throws IOException {
        this.customerId.write(arg0);
    }

    /* (non-Javadoc)
     * @see java.lang.Object#hashCode()
     */
    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result
                + ((customerId == null) ? 0 : customerId.hashCode());
        return result;
    }
    /* (non-Javadoc)
     * @see java.lang.Object#equals(java.lang.Object)
     */
    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        CustKey other = (CustKey) obj;
        if (customerId == null) {
            if (other.customerId != null)
                return false;
        } else if (!customerId.equals(other.customerId))
            return false;
        return true;
    }
}

类似的自定义值格式custvalue.java

package in.aniruddha.mapreduce.custFormat;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;

public class CustValue implements WritableComparable<CustValue> {
    protected CustKey custId;
    protected Text firstName,lastName,age,profession;
    public CustValue()
    {
        super();
        custId=new CustKey();
        firstName=new Text();
        lastName=new Text();
        age=new Text();
        profession=new Text();
    }

    public CustValue(CustKey custId, Text firstName, Text lastName, Text age,
            Text profession) {
        super();
        this.custId = new CustKey(custId);
        this.firstName = firstName;
        this.lastName = lastName;
        this.age = age;
        this.profession = profession;
    }
    public CustValue(String custId, String firstName, String lastName, String age,
            String profession) {
        super();
        this.custId = new CustKey(custId);
        this.firstName = new Text(firstName);
        this.lastName =new Text(lastName);
        this.age = new Text(age);
        this.profession = new Text(profession);
    }
    /**
     * @return the custId
     */
    public CustKey getCustId() {
        return custId;
    }

    /**
     * @param custId the custId to set
     */
    public void setCustId(CustKey custId) {
        this.custId = custId;
    }

    /**
     * @return the firstName
     */
    public Text getFirstName() {
        return firstName;
    }

    /**
     * @param firstName the firstName to set
     */
    public void setFirstName(Text firstName) {
        this.firstName = firstName;
    }

    /**
     * @return the lastName
     */
    public Text getLastName() {
        return lastName;
    }

    /**
     * @param lastName the lastName to set
     */
    public void setLastName(Text lastName) {
        this.lastName = lastName;
    }

    /**
     * @return the age
     */
    public Text getAge() {
        return age;
    }

    /**
     * @param age the age to set
     */
    public void setAge(Text age) {
        this.age = age;
    }

    /**
     * @return the profession
     */
    public Text getProfession() {
        return profession;
    }

    /**
     * @param profession the profession to set
     */
    public void setProfession(Text profession) {
        this.profession = profession;
    }

    @Override
    public void readFields(DataInput arg0) throws IOException {
        this.custId.readFields(arg0);
        this.age.readFields(arg0);
        this.profession.readFields(arg0);
        this.lastName.readFields(arg0);
        this.firstName.readFields(arg0);
        }

    @Override
    public void write(DataOutput arg0) throws IOException {
        this.custId.write(arg0);
        this.age.write(arg0);
        this.profession.write(arg0);
        this.lastName.write(arg0);
        this.firstName.write(arg0);
        }

    @Override
    public int compareTo(CustValue o) {
        /*
         * Here we're gonna compare customerid and the age
         */
        int comp=this.custId.customerId.compareTo(o.custId.customerId);
        if(comp!=0)
        {
            return comp;
        }
        else return this.age.compareTo(o.age);
    }

}

在这里,如果您不想对键进行排序,而只想对值进行排序,那么只需要在自定义值类中实现writablecomparable,并且由于您希望根据值对键进行排序,因此该键必须实现writable接口。
如果你有疑问,请给我回电。谢谢:)

相关问题