我发现调试和测试MapReduce项目很有挑战性。
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import writables.Friend;
import writables.FriendArray;
import writables.FriendPair;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.StringTokenizer;
public class FacebookFriendsMapper extends Mapper<LongWritable, Text, FriendPair, FriendArray> {
@Override
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
Logger log = Logger.getLogger(FacebookFriendsMapper.class);
StringTokenizer st = new StringTokenizer(value.toString(), "\t");
String person = st.nextToken();
String friends = st.nextToken();
Friend f1 = populateFriend(person);
List<Friend> friendList = populateFriendList(friends);
Friend[] friendArray = Arrays.copyOf(friendList.toArray(), friendList.toArray().length, Friend[].class);
FriendArray farray = new FriendArray(Friend.class, friendArray);
for(Friend f2 : friendList) {
FriendPair fpair = new FriendPair(f1, f2);
context.write(fpair, farray);
log.info(fpair+"......"+ farray);
}
}
private Friend populateFriend(String friendJson) {
JSONParser parser = new JSONParser();
Friend friend = null;
try {
Object obj = (Object)parser.parse(friendJson);
JSONObject jsonObject = (JSONObject) obj;
Long lid = (long)jsonObject.get("id");
IntWritable id = new IntWritable(lid.intValue());
Text name = new Text((String)jsonObject.get("name"));
Text hometown = new Text((String)jsonObject.get("hometown"));
friend = new Friend(id, name, hometown);
} catch (ParseException e) {
e.printStackTrace();
}
return friend;
}
private List<Friend> populateFriendList(String friendsJson) {
List<Friend> friendList = new ArrayList<Friend>();
try {
JSONParser parser = new JSONParser();
Object obj = (Object)parser.parse(friendsJson.toString());
JSONArray jsonarray = (JSONArray) obj;
for(Object jobj : jsonarray) {
JSONObject entry = (JSONObject)jobj;
Long lid = (long)entry.get("id");
IntWritable id = new IntWritable(lid.intValue());
Text name = new Text((String)entry.get("name"));
Text hometown = new Text((String)entry.get("hometown"));
Friend friend = new Friend(id, name, hometown);
friendList.add(friend);
}
} catch (ParseException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return friendList;
}
}
对于调试和测试,我通常会获取上面的脚本并将其放入另一个测试类的public static void main(String[] args)
中,然后在Intellij IDEA的调试模式下运行,并从本地文件系统阅读样本数据,因此,我非常肯定Map器的逻辑是正确的。
关于Reducer脚本,我不确定Map器如何将其输出传递到Reducer的细节。我在研究期间检查了示例Reducer脚本,并得出了我的Reducer的初始版本,如下所示:
public class FacebookFriendsReducer extends
Reducer<FriendPair, FriendArray, FriendPair, FriendArray> {
@Override
public void reduce(FriendPair key, Iterable<FriendArray> values, Context context)
throws IOException, InterruptedException {
}
}
这是我无法继续的地方,因为我无法模拟Map器如何将其输出传递到FacebookFriendsReducer
和reduce
方法。我目前的调试方法是在public static void main(String[] args)
中编写reducer逻辑,然后在进程中以调试模式运行它,然后将其放入reducer类。
有人能告诉我如何将Map器的正确输出传递到Reduce中,以便我可以进一步处理逻辑吗?如果您有更好的替代方法,可以在打包成jar文件并将其发送到Hadoop集群之前,在本地窗口机器上调试和测试MapReduce,请告诉我。
编辑@OneCricketeer的答案:
您可以检查驱动程序(主类),如下所示:
public class FacebookFriendsDriver extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
String inputPath = "E:\\sampleInputPath\\inputFile";
String outputPath = "E:\\sampleOutputPath\\outputFile";
// if (args.length != 2) {
// System.err.println("Usage: fberature <input path> <output path>");
// System.exit(-1);
// }
//Job Setup
Job fb = Job.getInstance(getConf(), "facebook-friends");
fb.setJarByClass(FacebookFriendsDriver.class);
//File Input and Output format
FileInputFormat.addInputPath(fb, new Path(inputPath));
FileOutputFormat.setOutputPath(fb, new Path(outputPath));
fb.setInputFormatClass(TextInputFormat.class);
fb.setOutputFormatClass(SequenceFileOutputFormat.class);
//Mapper-Reducer-Combiner specifications
fb.setMapperClass(FacebookFriendsMapper.class);
fb.setReducerClass(FacebookFriendsReducer.class);
fb.setMapOutputKeyClass(FriendPair.class);
fb.setMapOutputValueClass(FriendArray.class);
//Output key and value
fb.setOutputKeyClass(FriendPair.class);
fb.setOutputValueClass(FriendArray.class);
//Submit job
return fb.waitForCompletion(true) ? 0 : 1;
}
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new FacebookFriendsDriver(), args);
System.exit(exitCode);
}
}
上面的示例驱动程序类是我基于系统中现有的其他MapReduce作业创建的。但我无法使其在我的本地窗口计算机上工作,错误如下:
Connected to the target VM, address: '127.0.0.1:59143', transport: 'socket'
23/01/10 10:52:22 ERROR util.Shell: Failed to locate the winutils binary in the hadoop binary path
java.io.IOException: Could not locate executable null\bin\winutils.exe in the Hadoop binaries.
at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:324)
at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:339)
at org.apache.hadoop.util.Shell.<clinit>(Shell.java:332)
at org.apache.hadoop.util.GenericOptionsParser.preProcessForWindows(GenericOptionsParser.java:431)
at org.apache.hadoop.util.GenericOptionsParser.parseGeneralOptions(GenericOptionsParser.java:477)
at org.apache.hadoop.util.GenericOptionsParser.<init>(GenericOptionsParser.java:171)
at org.apache.hadoop.util.GenericOptionsParser.<init>(GenericOptionsParser.java:154)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:64)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
at FacebookFriendsDriver.main(FacebookFriendsDriver.java:60)
WARNING: An illegal reflective access operation has occurred
WARNING: Illegal reflective access by org.apache.hadoop.security.authentication.util.KerberosUtil (file:/C:/Users/Holyken/.m2/repository/org/apache/hadoop/hadoop-auth/2.3.0-cdh5.1.0/hadoop-auth-2.3.0-cdh5.1.0.jar) to method sun.security.krb5.Config.getInstance()
WARNING: Please consider reporting this to the maintainers of org.apache.hadoop.security.authentication.util.KerberosUtil
WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations
WARNING: All illegal access operations will be denied in a future release
23/01/10 10:52:23 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
23/01/10 10:52:23 INFO Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
23/01/10 10:52:23 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
Exception in thread "main" java.lang.NullPointerException
at java.base/java.lang.ProcessBuilder.start(ProcessBuilder.java:1090)
at java.base/java.lang.ProcessBuilder.start(ProcessBuilder.java:1071)
at org.apache.hadoop.util.Shell.runCommand(Shell.java:451)
at org.apache.hadoop.util.Shell.run(Shell.java:424)
at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:656)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:745)
at org.apache.hadoop.util.Shell.execCommand(Shell.java:728)
at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:633)
at org.apache.hadoop.fs.RawLocalFileSystem.mkdirs(RawLocalFileSystem.java:421)
at org.apache.hadoop.fs.FilterFileSystem.mkdirs(FilterFileSystem.java:281)
at org.apache.hadoop.mapreduce.JobSubmissionFiles.getStagingDir(JobSubmissionFiles.java:126)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:982)
at org.apache.hadoop.mapred.JobClient$2.run(JobClient.java:976)
at java.base/java.security.AccessController.doPrivileged(Native Method)
at java.base/javax.security.auth.Subject.doAs(Subject.java:423)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1554)
at org.apache.hadoop.mapred.JobClient.submitJobInternal(JobClient.java:976)
at org.apache.hadoop.mapreduce.Job.submit(Job.java:582)
at org.apache.hadoop.mapreduce.Job.waitForCompletion(Job.java:612)
at FacebookFriendsDriver.run(FacebookFriendsDriver.java:54)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:70)
at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:84)
at FacebookFriendsDriver.main(FacebookFriendsDriver.java:60)
Disconnected from the target VM, address: '127.0.0.1:59143', transport: 'socket'
您能否详细说明如何在本地文件系统上运行MapReduce作业?
1条答案
按热度按时间osh3o9ms1#
你可以在IDE中设置代码断点,甚至不需要真实的的hadoop集群,代码在本地文件系统中也可以运行。
另外,你也可以写单元测试,例如,你的json解析函数,看起来它可以在异常时返回空值,然后你继续添加空值到你的Map器输出中...你也不需要仅仅为了创建一个json数组而把列表转换成数组
作业驱动程序应用程序的mapreduce的主要方法是在调试器中启动的方法。
我无法模拟Map器如何将其输出传递到FacebookFriendsReducer
参数的给出类似于
GROUP BY key
操作,你的值是数组的可迭代对象,所以你需要循环遍历它们。不清楚减速器需要输出什么,因此输出类型可能不正确