我是hadoop的mapreduce的新手。我已经写了一个map reduce任务,我正在本地机器上运行它。但这项工作在Map100%完成后仍悬而未决。
下面是代码,我不明白我遗漏了什么。
我有一个自定义密钥类
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
public class AirlineMonthKey implements WritableComparable<AirlineMonthKey>{
Text airlineName;
Text month;
public AirlineMonthKey(){
super();
}
public AirlineMonthKey(Text airlineName, Text month) {
super();
this.airlineName = airlineName;
this.month = month;
}
public Text getAirlineName() {
return airlineName;
}
public void setAirlineName(Text airlineName) {
this.airlineName = airlineName;
}
public Text getMonth() {
return month;
}
public void setMonth(Text month) {
this.month = month;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
this.airlineName.readFields(in);
this.month.readFields(in);
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
this.airlineName.write(out);
this.month.write(out);
}
@Override
public int compareTo(AirlineMonthKey airlineMonthKey) {
// TODO Auto-generated method stub
int diff = getAirlineName().compareTo(airlineMonthKey.getAirlineName());
if(diff != 0){
return diff;
}
int m1 = Integer.parseInt(getMonth().toString());
int m2 = Integer.parseInt(airlineMonthKey.getMonth().toString());
if(m1>m2){
return -1;
}
else
return 1;
}
}
以及使用自定义键的mapper和reducer类,如下所示。
package com.mapresuce.secondarysort;
import java.io.IOException;
import java.io.StringReader;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
import com.opencsv.CSVReader;
public class FlightDelayByMonth {
public static class FlightDelayByMonthMapper extends
Mapper<Object, Text, AirlineMonthKey, Text> {
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String str = value.toString();
// Reading Line one by one from the input CSV.
CSVReader reader = new CSVReader(new StringReader(str));
String[] split = reader.readNext();
reader.close();
String airlineName = split[6];
String month = split[2];
String year = split[0];
String delayMinutes = split[37];
String cancelled = split[41];
if (!(airlineName.equals("") || month.equals("") || delayMinutes
.equals(""))) {
if (year.equals("2008") && cancelled.equals("0.00")) {
AirlineMonthKey airlineMonthKey = new AirlineMonthKey(
new Text(airlineName), new Text(month));
Text delay = new Text(delayMinutes);
context.write(airlineMonthKey, delay);
System.out.println("1");
}
}
}
}
public static class FlightDelayByMonthReducer extends
Reducer<AirlineMonthKey, Text, Text, Text> {
public void reduce(AirlineMonthKey key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
for(Text val : values){
context.write(new Text(key.getAirlineName().toString()+" "+key.getMonth().toString()), val);
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args)
.getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage:<in> <out>");
System.exit(2);
}
Job job = new Job(conf, "Average monthly flight dealy");
job.setJarByClass(FlightDelayByMonth.class);
job.setMapperClass(FlightDelayByMonthMapper.class);
job.setReducerClass(FlightDelayByMonthReducer.class);
job.setOutputKeyClass(AirlineMonthKey.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
我还在main中创建了一个作业和配置。不知道我错过了什么。我是在当地环境下运行的。
2条答案
按热度按时间u7up0aaq1#
尝试在airlinemonthkey类中编写tostring、equals和hashcode的自定义实现。
阅读下面的链接。
http://hadoop.apache.org/docs/stable/api/org/apache/hadoop/io/writablecomparable.html
键类型实现hashcode()很重要。
希望这能帮到你。
2admgd592#
问题是我必须在airlinemonthkey中使用默认构造函数(我这样做了),并在customkey类中初始化示例变量(我没有)。