无法从mapreduce作业中获得我想要的结果

ckocjqey  于 2021-06-02  发布在  Hadoop
关注(0)|答案(2)|浏览(271)

这是我的数据样本

如果第一列是索引0,我想使用mapreduce从该文件中获取每个商店的总销售额,商店名称位于索引2,收入位于索引4
这是我的Map代码

public void map(LongWritable key , Text value , Context context)
throws IOException , InterruptedException
{
    String line = value.toString();
    String[] columns = line.split("\t");

    if(columns.length == 6)
    {
        String storeNameString = columns[2];
        Text storeName = new Text(storeNameString);

        String storeRevenueString = columns[4];
        IntWritable storeRevenue = new IntWritable(Integer.parseInt(storeRevenueString));
        context.write(storeName, storeRevenue);
    }   
}

这是我的代码

public void reduce(Text key, Iterable<IntWritable> values, Context context)
        throws IOException , InterruptedException {

    Text storeName = key;
    int storeSales = 0;

    while(values.iterator().hasNext())
    {
        storeSales += values.iterator().next().get();

    }
    context.write(storeName, new IntWritable(storeSales));
}

这是运行作业的代码

public class StoreSales extends Configured implements Tool {

public static void main(String[] args) throws Exception {
    // this main function will call run method defined above.
    int res = ToolRunner.run(new StoreSales(),args);
    System.exit(res);
}

@Override
public int run(String[] args) throws Exception {
    // TODO Auto-generated method stub
    JobConf conf = new JobConf();

    @SuppressWarnings("unused")
    Job job = new Job(conf , "Sales Per Store");

    job.setMapperClass(StoreSalesMapper.class);
    job.setReducerClass(StoreSalesReducer.class);
    job.setJarByClass(StoreSales.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    Path input = new Path(args[0]);
    Path output = new Path(args[1]);

    FileInputFormat.addInputPath(conf , input);
    FileOutputFormat.setOutputPath(conf, output);

    JobClient.runJob(conf);

    return 0;
    }
 }

这是一个如何得到结果的示例

这就是我得到的结果

我做错什么了?

vltsax25

vltsax251#

我相信我已经找到了问题所在。使用line.split方法时,您对制表符进行了不正确的转义。这是因为 String.split 方法将其输入解释为正则表达式。使用regex时,指定制表符的正确方法是 \\t ,当您使用 \t . 这是因为反斜杠本身必须转义。请注意,您缺少一个 \ 性格。
修正分裂条件

String[] columns = line.split("\\t");
mlmc2os5

mlmc2os52#

您的逻辑没有问题,我使用了您的逻辑,并在驱动程序中修改了位,使用了新的map reduce api:
Map器部件
导入java.io.ioexception;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class Map extends Mapper<LongWritable,Text,Text,IntWritable>{

    public void map(LongWritable key , Text value , Context context)
            throws IOException , InterruptedException
            {
                String line = value.toString();
                String[] columns = line.split("\\t");

                if(columns.length == 6)
                {
                    String storeNameString = columns[2];
                    Text storeName = new Text(storeNameString);

                    String storeRevenueString = columns[4];
                    IntWritable storeRevenue = new IntWritable(Integer.parseInt(storeRevenueString));
                    context.write(storeName, storeRevenue);
                }   
            }
}

import java.io.IOException;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class Reduce extends Reducer<Text,IntWritable,Text,IntWritable>{

    public void reduce(Text key, Iterable<IntWritable> values, Context context)
            throws IOException , InterruptedException {

        Text storeName = key;
        int storeSales = 0;

        while(values.iterator().hasNext())
        {
            storeSales += values.iterator().next().get();

        }
        context.write(storeName, new IntWritable(storeSales));
    }

}

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class Driver {

public static void main(String[] args) throws Exception {
    // this main function will call run method defined above.

    // TODO Auto-generated method stub
    Configuration conf=new Configuration();
    @SuppressWarnings("unused")
    Job job = new Job(conf , "Sales Per Store");

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);
    job.setJarByClass(Driver.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.waitForCompletion(true);

    }
 }

示例输入文件:
2012-01-01 09.00 sanjose clothin美国运通214
2012-01-01 09.00西雅图音乐大师
2012-01-01 09.00西雅图elec 3120 master
2012-01-01 09.00桑何塞香水3200美国运通
输出文件:
cat测试123/part-r-00000
圣何塞3414
西雅图3440

相关问题