java—在hadoop中执行一个简单的mapreduce函数来搜索日志文件中的字符串

gt0wga4j  于 2021-05-29  发布在  Hadoop
关注(0)|答案(1)|浏览(259)

当我在eclipse中使用本地文件系统中的输入文件执行它时,mapreduce工作正常。但是当我在hortonworks沙盒中通过将输入文件放入hdfs来执行jar文件时,stringkey变量没有被设置,即stringkey在mapper中为null,但我正在从main函数示例化它,并且可以在那里访问。我的代码有错误吗?

import java.io.IOException;
    import java.util.Iterator;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapred.FileInputFormat;
    import org.apache.hadoop.mapred.FileOutputFormat;
    import org.apache.hadoop.mapred.JobClient;
    import org.apache.hadoop.mapred.JobConf;
    import org.apache.hadoop.mapred.MapReduceBase;
    import org.apache.hadoop.mapred.Mapper;
    import org.apache.hadoop.mapred.OutputCollector;
    import org.apache.hadoop.mapred.Reducer;
    import org.apache.hadoop.mapred.Reporter;
    import org.apache.hadoop.mapred.TextInputFormat;
    import org.apache.hadoop.mapred.TextOutputFormat;

    public class StringSearch {
        static String stringKey;
        public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
            private final static IntWritable one = new IntWritable(1);
            private Text word = new Text();

            public void map(LongWritable key, Text value,
                    OutputCollector<Text, IntWritable> output, Reporter reporter)
                            throws IOException {
                String line = value.toString();
                System.out.println(StringSearch.stringKey);
                if(StringSearch.stringKey != null)
                {
                    if(line.contains(StringSearch.stringKey))
                    {
                        word.set(line);
                        output.collect(word, one);
                    }
                }
            }

        }
        public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
            public void reduce(Text key, Iterator<IntWritable> values,
                    OutputCollector<Text, IntWritable> output, Reporter reporter)
                            throws IOException {
                int sum = 0;
                //Iterate through all the values with respect to a key and
                //sum up all of them
                while (values.hasNext()) {
                    sum += values.next().get();
                }
                //Push to the output collector the Key and the obtained
                //sum as value
                output.collect(key, new IntWritable(sum));

            }
        }
        public static class Main {
            public static void main(String[] args) throws Exception {
                if(args.length > 2)
                {
                    stringKey = args[2];
                    System.out.println(stringKey);
                }

                //creating a JobConf object and assigning a job name for identification purposes
                JobConf conf = new JobConf(StringSearch.class);
                conf.setJobName("StringSearch");
                //Setting configuration object with the Data Type of output Key and Value for //map and reduce if you have diffrent type of outputs there is other set method //for them
                conf.setOutputKeyClass(Text.class);
                conf.setOutputValueClass(IntWritable.class);
                conf.setMapperClass(Map.class);
                conf.setCombinerClass(Reduce.class); //set theCombiner class
                conf.setReducerClass(Reduce.class);
                conf.setInputFormat(TextInputFormat.class);
                conf.setOutputFormat(TextOutputFormat.class);
                //the hdfs input and output directory to be fetched from the command line
                FileInputFormat.setInputPaths(conf, new Path(args[0]));
                FileOutputFormat.setOutputPath(conf, new Path(args[1]));
                //submits the job to MapReduce. and returns only after the job has completed
                JobClient.runJob(conf);
            }

        }

    }
ybzsozfc

ybzsozfc1#

您正在尝试访问hadoop/hdfs中的java变量,这是不可能的。而不是 stringKey = args[2]; ,使用 conf.set("stringkey", args[2]) . 在mapper/reducer中初始化 conf 使用 conf.get("stringkey")

相关问题