使用hadoop map reduce比较两个文本文件

l0oc07j2  于 2021-05-30  发布在  Hadoop
关注(0)|答案(1)|浏览(437)

我想逐行比较两个文本文件,看它们是否相等。如何使用hadoop map reduce编程?

static int i=0;
public void map(LongWritable key, String value, OutputCollector<String,IntWritable> output, Reporter reporter) throws IOException {
      String line = value.toString();
     i++; //used as a line number
        output.collect(line, new IntWritable(i));
 }

我试着用行号Map每一行。但是我怎样才能减少它并与另一个文件进行比较呢?

c7rzv4ha

c7rzv4ha1#

比较两个文本文件相当于在map reduce编程中连接两个文件。要连接两个文本文件,必须使用两个键相同的Map器。在您的情况下,可以使用键作为线偏移,使用值作为线偏移。multipleinputs()方法用于使用多个Map器和多个文本文件。
请找到下面的详细程序比较两个文本文件在Map减少编程使用java。
程序的参数是文件1、文件2和输出文件

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class CompareTwoFiles {

    public static class Map extends
            Mapper<LongWritable, Text, LongWritable, Text> {

        @Override
        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            context.write(key, value);
        }
    }

    public static class Map2 extends
            Mapper<LongWritable, Text, LongWritable, Text> {

        @Override
        public void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            context.write(key, value);
        }
    }

    public static class Reduce extends
            Reducer<LongWritable, Text, LongWritable, Text> {

        @Override
        public void reduce(LongWritable key, Iterable<Text> values,
                Context context) throws IOException, InterruptedException {
            String[] lines = new String[2];
            int i = 0;
            for (Text text : values) {
                lines[i] = text.toString();
                i++;
            }
            if (lines[0].equals(lines[1])) {
                context.write(key, new Text("same"));
            } else {
                context.write(key,
                        new Text(lines[0] + "     vs    " + lines[1]));
            }

        }

    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        conf.set("fs.default.name", "hdfs://localhost:8020");
        Job job = new Job(conf);
        job.setJarByClass(CompareTwoFiles.class);
        job.setJobName("Compare Two Files and Identify the Difference");
        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(LongWritable.class);
        job.setOutputValueClass(Text.class);
        MultipleInputs.addInputPath(job, new Path(args[0]),
                TextInputFormat.class, Map.class);
        MultipleInputs.addInputPath(job, new Path(args[1]),
                TextInputFormat.class, Map2.class);
        job.waitForCompletion(true);

    }

}

相关问题