hadoopmapreduce生成不同长度的子字符串

5cnsuln7  于 2021-05-29  发布在  Hadoop
关注(0)|答案(1)|浏览(491)

我正在使用hadoopmapreduce编写代码来获取不同长度的子字符串。示例给出了字符串“zyxcba”和长度3(使用文本文件,我输入为“3zyxcba”)。我的代码必须返回所有可能的字符串,长度为3(“zyx”,“yxc”,“xcb”,“cba”),长度为4(“zyxc”,“yxcb”,“xcba”),最后返回长度为5(“zyxcb”,“yxcba”)。
在map阶段,我做了以下工作:
key=我想要的子字符串的长度
value=“zyxcba”。
所以Map器输出是

3,"ZYXCBA"
4,"ZYXCBA"
5,"ZYXCBA"

在reduce中,我使用string(“zyxcba”)和key 3来获得长度为3的所有子字符串。同样的情况也发生在4,5。结果使用字符串连接。因此,减排量应为:

3 "ZYX YXC XCB CBA"
4 "ZYXC YXCB XCBA"
5 "ZYXCB YXCBA"

我正在使用以下命令运行代码:

hduser@Ganesh:~/Documents$ hadoop jar Saishingles.jar hadoopshingles.Saishingles Behara/Shingles/input Behara/Shingles/output

我的代码如下:

package hadoopshingles;

import java.io.IOException;
//import java.util.ArrayList;

import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class Saishingles{

public static class shinglesmapper extends Mapper<Object, Text, IntWritable, Text>{

        public void map(Object key, Text value, Context context
                ) throws IOException, InterruptedException {

            String str = new String(value.toString());
            String[] list = str.split(" ");
            int x = Integer.parseInt(list[0]);
            String val = list[1];
            int M = val.length();
            int X = M-1;

            for(int z = x; z <= X; z++)
            {
                context.write(new IntWritable(z), new Text(val));
            }

        }

     }

public static class shinglesreducer extends Reducer<IntWritable,Text,IntWritable,Text> {

    public void reduce(IntWritable key, Text value, Context context
            ) throws IOException, InterruptedException {
        int z = key.get();
        String str = new String(value.toString());
        int M = str.length();
        int Tz = M - z;
        String newvalue = "";
        for(int position = 0; position <= Tz; position++)
        {
            newvalue = newvalue + " " + str.substring(position,position + z);   
        }

        context.write(new IntWritable(z),new Text(newvalue));
    }
}

public static void main(String[] args) throws Exception {
      GenericOptionsParser parser = new GenericOptionsParser(args);
      Configuration conf = parser.getConfiguration();
      String[] otherArgs = parser.getRemainingArgs();

        if (otherArgs.length != 2) 
        {
          System.err.println("Usage: Saishingles <inputFile> <outputDir>");
          System.exit(2);
        }
      Job job = Job.getInstance(conf, "Saishingles");
      job.setJarByClass(hadoopshingles.Saishingles.class);
      job.setMapperClass(shinglesmapper.class);
      //job.setCombinerClass(shinglesreducer.class);
      job.setReducerClass(shinglesreducer.class);
      //job.setMapOutputKeyClass(IntWritable.class);
      //job.setMapOutputValueClass(Text.class);
      job.setOutputKeyClass(IntWritable.class);
      job.setOutputValueClass(Text.class);
      FileInputFormat.addInputPath(job, new Path(args[0]));
      FileOutputFormat.setOutputPath(job, new Path(args[1]));
      System.exit(job.waitForCompletion(true) ? 0 : 1);

}

}

减量不退产量

3 "ZYX YXC XCB CBA"
4 "ZYXC YXCB XCBA"
5 "ZYXCB YXCBA"

它回来了

3 "ZYXCBA"
4 "ZYXCBA"
5 "ZYXCBA"

i、 例如,它的输出与mapper相同。不知道为什么会这样。请帮我解决这个问题,并提前感谢您的帮助;):):)

nhjlsmyf

nhjlsmyf1#

你甚至不用运行减速机就可以做到这一点。你的map/reduce逻辑是错误的…转换应该在mapper中完成。
减少-在这个阶段 reduce(WritableComparable, Iterator, OutputCollector, Reporter) 方法被调用 <key, (list of values)> 在分组输入中配对。
在你的 reduce 签名: public void reduce(IntWritable key, Text value, Context context) 应该是 public void reduce(IntWritable key, Iterable<Text> values, Context context) 另外,更改reduce方法的最后一行: context.write(new IntWritable(z),new Text(newvalue));context.write(key,new Text(newvalue)); -你已经有了 Intwritable Key 从mapper,我不会创建 new 一个。
给定输入:

3 "ZYXCBA"
4 "ZYXCBA"
5 "ZYXCBA"

Map程序作业将输出:

3   "XCB YXC ZYX"
4   "XCBA YXCB ZYXC"
5   "YXCBA ZYXCB"

MapReduce作业:

import java.io.IOException;
import java.util.ArrayList;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class SubStrings{

    public static class SubStringsMapper extends Mapper<Object, Text, IntWritable, Text> {

        @Override
        public void map(Object key, Text value, Context context) throws IOException, InterruptedException {

            String [] values = value.toString().split(" ");
            int len = Integer.parseInt(values[0].trim());
            String str = values[1].replaceAll("\"", "").trim();

            int endindex=len;
            for(int i = 0; i < len; i++)
            {
                endindex=i+len;
                if(endindex <= str.length())
                    context.write(new IntWritable(len), new Text(str.substring(i, endindex))); 
            }

        }   
    }

    public  static class SubStringsReducer extends Reducer<IntWritable, Text, IntWritable, Text> {

        public void reduce(IntWritable key, Iterable<Text> values, Context context) 
                throws IOException, InterruptedException {

            String str="\""; //adding starting quotes
            for(Text value: values)
                str += " " + value;

            str=str.replace("\" ", "\"") + "\""; //adding ending quotes
            context.write(key, new Text(str));
        }
    }

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "get-possible-strings-by-length");

        job.setJarByClass(SubStrings.class);
        job.setMapperClass(SubStringsMapper.class); 
        job.setReducerClass(SubStringsReducer.class);

        job.setMapOutputKeyClass(IntWritable.class);
        job.setMapOutputValueClass(Text.class);

        job.setOutputKeyClass(IntWritable.class);
        job.setOutputValueClass(Text.class);

        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        FileSystem fs = null;
        Path dstFilePath = new Path(args[1]);
        try {
            fs = dstFilePath.getFileSystem(conf);
            if (fs.exists(dstFilePath))
                fs.delete(dstFilePath, true);
        } catch (IOException e1) {
            e1.printStackTrace();
        }

        job.waitForCompletion(true);
    } 
}

相关问题