我有30行的数据。我正在尝试使用mapreduce程序清理数据。数据正在正确清理,但30行中只有一行显示。我猜读唱片的人不是在逐行阅读。请检查一下我的密码,告诉我问题出在哪里。我是hadoop新手。
数据:-
1 Vlan154.DEL-ISP-COR-SWH-002.mantraonline.com (61.95.250.140) 0.460 ms 0.374 ms 0.351 ms
2 202.56.223.213 (202.56.223.213) 39.718 ms 39.511 ms 39.559 ms
3 202.56.223.17 (202.56.223.17) 39.714 ms 39.724 ms 39.628 ms
4 125.21.167.153 (125.21.167.153) 41.114 ms 40.001 ms 39.457 ms
5 203.208.190.65 (203.208.190.65) 120.340 ms 71.384 ms 71.346 ms
6 ge-0-1-0-0.sngtp-dr1.ix.singtel.com (203.208.149.158) 71.493 ms ge-0-1-2-0.sngtp-dr1.ix.singtel.com (203.208.149.210) 71.183 ms ge-0-1-0-0.sngtp-dr1.ix.singtel.com (203.208.149.158) 71.739 ms
7 ge-0-0-0-0.sngtp-ar3.ix.singtel.com (203.208.182.2) 80.917 ms ge-2-0-0-0.sngtp-ar3.ix.singtel.com (203.208.183.20) 71.550 ms ge-1-0-0-0.sngtp-ar3.ix.singtel.com (203.208.182.6) 71.534 ms
8 203.208.151.26 (203.208.151.26) 141.716 ms 203.208.145.190 (203.208.145.190) 134.740 ms 203.208.151.26 (203.208.151.26) 142.453 ms
9 219.158.3.225 (219.158.3.225) 138.774 ms 157.205 ms 157.123 ms
10 219.158.4.69 (219.158.4.69) 156.865 ms 157.044 ms 156.845 ms
11 202.96.12.62 (202.96.12.62) 157.109 ms 160.294 ms 159.805 ms
12 61.148.3.58 (61.148.3.58) 159.521 ms 178.088 ms 160.004 ms
MPLS Label=33 CoS=5 TTL=1 S=0
13 202.106.48.18 (202.106.48.18) 199.730 ms 181.263 ms 181.300 ms
14 * * *
15 * * *
16 * * *
17 * * *
18 * * *
19 * * *
20 * * *
21 * * *
22 * * *
23 * * *
MapReduceprogram:-
公共类跟踪数据清理{
/**
* @param args
* @throws IOException
* @throws InterruptedException
* @throws ClassNotFoundException
*/
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
String userArgs[] = new GenericOptionsParser(conf, args).getRemainingArgs();
if (userArgs.length < 2) {
System.out.println("Usage: hadoop jar jarfilename mainclass input output");
System.exit(1);
}
Job job = new Job(conf, "cleaning trace route data");
job.setJarByClass(TraceRouteDataCleaning.class);
job.setMapperClass(TraceRouteMapper.class);
job.setReducerClass(TraceRouteReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(userArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(userArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
public static class TraceRouteMapper extends Mapper<LongWritable, Text, Text, Text>{
StringBuilder emitValue = null;
StringBuilder emitKey = null;
Text kword = new Text();
Text vword = new Text();
public void map(LongWritable key, Text value, Context context) throws InterruptedException, IOException
{
// String[] cleanData;
String lines = value.toString();
//deleting ms in RTT time data
lines = lines.replace(" ms", "");
String[] data = lines.split(" ");
emitValue = new StringBuilder(1024);
emitKey = new StringBuilder(1024);
if (data.length == 6) {
emitKey.append(data[0]);
emitValue.append(data[1]).append("\t").append(data[2]).append("\t").append(data[3]).append("\t").append(data[4]).append("\t").append(data[5]);
kword.set(emitKey.toString());
vword.set(emitValue.toString());
context.write(kword, vword);
}
}
}
public static class TraceRouteReducer extends Reducer<Text, Text, Text, Text>{
Text vword = new Text();
public void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException{
context.write(key,vword);
}
}
}
1条答案
按热度按时间z18hc3ub1#
第一件事你的减速机类应该根据你的要求如下。如果你的关键是没有发射多个文本,然后选择第一个减速机或选择第二个。