我正在尝试从hadoop0.20.2java代码迁移到hadoop2.4.1。我正在使用MapReduceAPI。当我尝试运行作业时,出现以下错误:java.io.ioexception:cannot initialize cluster。请检查mapreduce.framework.name的配置以及相应的服务器地址。
我的项目是一个java项目,我正在尝试简单地执行它(当我使用0.20.2时,它起作用了)下面是一个代码示例:
public class HadoopTool extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
PropertiesConfiguration props = getConfigurationProperties(args);
Configuration conf = getJobConfiguration(props);
Job job = Job.getInstance(conf);
jobSetup(props, job);
if (!job.waitForCompletion(true)) {
System.out.println("Error in hadoop tool");
return 1;
}
System.out.println("Finish hadoop tool successfully");
return 0;
}
/**
* @param props
* @param job
* @throws IOException
* @throws ClassNotFoundException
*/
private void jobSetup(PropertiesConfiguration props, Job job) throws IOException, ClassNotFoundException {
job.setJobName("Hadoop tool");
String inputDir = ;//input dir
FileInputFormat.addInputPaths(job, inputDir);
String outputDir = ; //outputdir
FileOutputFormat.setOutputPath(job, new Path(outputDir));
job.setNumReduceTasks(1);
job.setJarByClass(HadoopTool.class);
job.setInputFormatClass(TextInputFormat.class);
job.setMapperClass(HadoopMapper.class);
job.setMapOutputKeyClass(IdNamePair.class);
job.setMapOutputValueClass(Text.class);
job.setGroupingComparatorClass(Grouper.class);
job.setPartitionerClass(Partitioner.class);
job.setReducerClass(Reducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NameValuePair.class);
String outputClassName = ; // my output class
Class<?> outputClass = Class.forName(outputClassName);
job.setOutputFormatClass((Class<? extends OutputFormat>) outputClass);
}
/**
* @param props
* @return
* @throws IllegalAccessException
* @throws InstantiationException
* @throws ClassNotFoundException
* @throws IOException
*/
private Configuration getJobConfiguration(PropertiesConfiguration props) throws IllegalAccessException,
InstantiationException, ClassNotFoundException, IOException {
Configuration conf = getConf();
// when running on Windows, must do that before FileSystem is first
// acquired
if (org.apache.lucene.util.Constants.WINDOWS) {
conf.set("fs.file.impl", IgnoreSetPermissionsFileSystem.class.getName());
}
conf.setBoolean("mapred.mapper.new-api", true);
conf.setBoolean("mapred.map.tasks.speculative.execution", false);
conf.setBoolean("mapred.reduce.tasks.speculative.execution", false);
return conf;
}
/**
* @param args
* @return
* @throws FileNotFoundException
* @throws ConfigurationException
* @throws IOException
*/
private PropertiesConfiguration getConfigurationProperties(String[] args) throws FileNotFoundException,
ConfigurationException, IOException {
PropertiesConfiguration props = new PropertiesConfiguration();
final InputStream in = new FileInputStream(new File(args[0]));
try {
props.load(in);
} finally {
in.close();
}
return props;
}
public static int runHadoopTool(String[] args) throws Exception {
Configuration conf = new Configuration();
int runRes = ToolRunner.run(conf, new HadoopTool(), args);
return runRes;
}
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.err.println("Usage: " + HadoopTool.class.getName() + " [config file]");
System.exit(1);
}
int runRes = HadoopTool.runHadoopTool(args);
System.exit(runRes);
}
我真的很感激你的帮助。谢谢您
谢伊
暂无答案!
目前还没有任何答案,快来回答吧!