这个程序是用cloudera编写的。这是我创建的驱动程序类。
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
public class WordCount2
{
public static void main(String[] args) throws Exception
{
if(args.length < 2)
{
System.out.println("Enter input and output path correctly ");
System.exit(-1);//exit if error occurs
}
Configuration conf = new Configuration();
@SuppressWarnings("deprecation")
Job job = new Job(conf,"WordCount2");
//Define MapReduce job
//
//job.setJobName("WordCount2");// job name created
job.setJarByClass(WordCount2.class); //Jar file will be created
//Set input/ouptput paths
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job,new Path(args[1]));
//Set input/output Format
job.setInputFormatClass(TextInputFormat.class);// input format is of TextInput Type
job.setOutputFormatClass(TextOutputFormat.class); // output format is of TextOutputType
//set Mapper and Reducer class
job.setMapperClass(WordMapper.class);
job.setReducerClass(WordReducer.class);
//Set output key-value types
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
//submit job
System.exit(job.waitForCompletion(true)?0:1);// If job is completed exit successfully, else throw error
}
}
下面是mapper类的代码。
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.Mapper;
public class WordMapper extends Mapper<LongWritable, Text, Text, IntWritable>
{
@Override
public void map(LongWritable key, Text value,Context context)
throws IOException, InterruptedException
{
String line = value.toString();
StringTokenizer tokenizer = new StringTokenizer(line);
while(tokenizer.hasMoreTokens())
{
String word= tokenizer.nextToken();
context.write(new Text(word), new IntWritable(1));
}
}
}
//
3条答案
按热度按时间czfnxgou1#
下面是命令行日志
下面是输入数据显示输入文件soni.txt:
在r-00000部分文件中收到以下输出:
但是,我认为这不应该是正确的输出。它应该给出确切的字数。
kulphzqa2#
你的
reduce
方法签名错误,因此从不调用它。你需要从Reducer
班级:它是一个iterable而不是迭代器
试试这个:
whlutmcx3#
减速器等级