本文整理了Java中org.apache.hadoop.mapreduce.OutputFormat
类的一些代码示例,展示了OutputFormat
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。OutputFormat
类的具体详情如下:
包路径:org.apache.hadoop.mapreduce.OutputFormat
类名称:OutputFormat
[英]OutputFormat
describes the output-specification for a Map-Reduce job.
The Map-Reduce framework relies on the OutputFormat
of the job to:
OutputFormat
描述Map Reduce作业的输出规范。OutputFormat
来:代码示例来源:origin: apache/hive
/**
* Get the output committer for this output format. This is responsible
* for ensuring the output is committed correctly.
* @param context the task context
* @return an output committer
* @throws IOException
* @throws InterruptedException
*/
@Override
public OutputCommitter getOutputCommitter(TaskAttemptContext context
) throws IOException, InterruptedException {
return getOutputFormat(context).getOutputCommitter(context);
}
代码示例来源:origin: apache/avro
@SuppressWarnings("unchecked")
private synchronized RecordWriter getRecordWriter(
TaskAttemptContext taskContext, String baseFileName)
throws IOException, InterruptedException {
// look for record-writer in the cache
RecordWriter writer = recordWriters.get(baseFileName);
// If not in cache, create a new one
if (writer == null) {
// get the record writer from context output format
//FileOutputFormat.setOutputName(taskContext, baseFileName);
taskContext.getConfiguration().set("avro.mo.config.namedOutput",baseFileName);
try {
writer = ((OutputFormat) ReflectionUtils.newInstance(
taskContext.getOutputFormatClass(), taskContext.getConfiguration()))
.getRecordWriter(taskContext);
} catch (ClassNotFoundException e) {
throw new IOException(e);
}
// if counters are enabled, wrap the writer with context
// to increment counters
if (countersEnabled) {
writer = new RecordWriterWithCounter(writer, baseFileName, context);
}
// add the record-writer to the cache
recordWriters.put(baseFileName, writer);
}
return writer;
}
代码示例来源:origin: apache/hive
/**
* Check for validity of the output-specification for the job.
* @param context information about the job
* @throws IOException when output should not be attempted
*/
@Override
public void checkOutputSpecs(JobContext context
) throws IOException, InterruptedException {
getOutputFormat(context).checkOutputSpecs(context);
}
代码示例来源:origin: apache/flink
this.outputCommitter = this.mapreduceOutputFormat.getOutputCommitter(this.context);
this.outputCommitter.setupJob(new JobContextImpl(this.configuration, new JobID()));
} catch (Exception e) {
this.context.getCredentials().addAll(this.credentials);
Credentials currentUserCreds = getCredentialsFromUGI(UserGroupInformation.getCurrentUser());
if (currentUserCreds != null) {
this.context.getCredentials().addAll(currentUserCreds);
this.recordWriter = this.mapreduceOutputFormat.getRecordWriter(this.context);
} catch (InterruptedException e) {
throw new IOException("Could not create RecordWriter.", e);
代码示例来源:origin: apache/ignite
/**
* Put write into Hadoop context and return associated output format instance.
*
* @param jobCtx Job context.
* @return Output format.
* @throws IgniteCheckedException In case of Grid exception.
* @throws InterruptedException In case of interrupt.
*/
protected OutputFormat prepareWriter(JobContext jobCtx)
throws IgniteCheckedException, InterruptedException {
try {
OutputFormat outputFormat = getOutputFormat(jobCtx);
assert outputFormat != null;
OutputCommitter outCommitter = outputFormat.getOutputCommitter(hadoopCtx);
if (outCommitter != null)
outCommitter.setupTask(hadoopCtx);
RecordWriter writer = outputFormat.getRecordWriter(hadoopCtx);
hadoopCtx.writer(writer);
return outputFormat;
}
catch (IOException | ClassNotFoundException e) {
throw new IgniteCheckedException(e);
}
}
代码示例来源:origin: apache/ignite
/** {@inheritDoc} */
@Override protected void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
try {
JobContextImpl jobCtx = taskCtx.jobContext();
OutputFormat outputFormat = getOutputFormat(jobCtx);
outputFormat.checkOutputSpecs(jobCtx);
OutputCommitter committer = outputFormat.getOutputCommitter(hadoopContext());
if (committer != null)
committer.setupJob(jobCtx);
}
catch (ClassNotFoundException | IOException e) {
throw new IgniteCheckedException(e);
}
catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new IgniteInterruptedCheckedException(e);
}
}
}
代码示例来源:origin: org.apache.hadoop/hadoop-mapreduce-client-common
private org.apache.hadoop.mapreduce.OutputCommitter
createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception {
org.apache.hadoop.mapreduce.OutputCommitter committer = null;
LOG.info("OutputCommitter set in config "
+ conf.get("mapred.output.committer.class"));
if (newApiCommitter) {
org.apache.hadoop.mapreduce.TaskID taskId =
new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0);
org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID =
new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0);
org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
new TaskAttemptContextImpl(conf, taskAttemptID);
OutputFormat outputFormat =
ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
committer = outputFormat.getOutputCommitter(taskContext);
} else {
committer = ReflectionUtils.newInstance(conf.getClass(
"mapred.output.committer.class", FileOutputCommitter.class,
org.apache.hadoop.mapred.OutputCommitter.class), conf);
}
LOG.info("OutputCommitter is " + committer.getClass().getName());
return committer;
}
代码示例来源:origin: cdapio/cdap
@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
for (String name : MultipleOutputs.getNamedOutputsList(context)) {
Class<? extends OutputFormat> namedOutputFormatClass =
MultipleOutputs.getNamedOutputFormatClass(context, name);
JobContext namedContext = MultipleOutputs.getNamedJobContext(context, name);
OutputFormat<K, V> outputFormat =
ReflectionUtils.newInstance(namedOutputFormatClass, namedContext.getConfiguration());
outputFormat.checkOutputSpecs(namedContext);
}
}
代码示例来源:origin: apache/hive
public MultiRecordWriter(TaskAttemptContext context) throws IOException,
InterruptedException {
baseRecordWriters = new LinkedHashMap<String, BaseRecordWriterContainer>();
String[] aliases = getOutputFormatAliases(context);
for (String alias : aliases) {
LOGGER.info("Creating record writer for alias: " + alias);
TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context);
Configuration aliasConf = aliasContext.getConfiguration();
// Create output directory if not already created.
String outDir = aliasConf.get("mapred.output.dir");
if (outDir != null) {
Path outputDir = new Path(outDir);
FileSystem fs = outputDir.getFileSystem(aliasConf);
if (!fs.exists(outputDir)) {
fs.mkdirs(outputDir);
}
}
OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext);
baseRecordWriters.put(alias,
new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext),
aliasContext));
}
}
代码示例来源:origin: org.apache.crunch/crunch-core
public static OutputCommitter getOutputCommitter(TaskAttemptContext tac) throws IOException, InterruptedException {
Map<String, OutputConfig> outputs = getNamedOutputs(tac.getConfiguration());
Map<String, OutputCommitter> committers = Maps.newHashMap();
for (Map.Entry<String, OutputConfig> e : outputs.entrySet()) {
String namedOutput = e.getKey();
Job job = getJob(tac.getJobID(), e.getKey(), tac.getConfiguration());
OutputFormat fmt = getOutputFormat(namedOutput, job, e.getValue());
TaskAttemptContext taskContext = getTaskContext(tac, job);
OutputCommitter oc = fmt.getOutputCommitter(taskContext);
committers.put(namedOutput, oc);
}
return new CompositeOutputCommitter(outputs, committers);
}
代码示例来源:origin: apache/hive
/**
* Get the record writer for the job. This uses the StorageHandler's default
* OutputFormat to get the record writer.
* @param context the information about the current task
* @return a RecordWriter to write the output for the job
* @throws IOException
* @throws InterruptedException
*/
@Override
public RecordWriter<WritableComparable<?>, HCatRecord>
getRecordWriter(TaskAttemptContext context)
throws IOException, InterruptedException {
return getOutputFormat(context).getRecordWriter(context);
}
代码示例来源:origin: seznam/euphoria
@Override
public void commit() throws IOException {
try {
final TaskAttemptContext cleanupContext =
HadoopUtils.createCleanupTaskContext(conf.get(), jobID.get());
getOutputFormat(cleanupContext.getTaskAttemptID())
.getOutputCommitter(cleanupContext)
.commitJob(cleanupContext);
} catch (Exception e) {
throw new IOException("Unable to commit output", e);
}
}
代码示例来源:origin: seznam/euphoria
@Override
public HadoopWriter<K, V> openWriter(int partitionId) {
try {
final TaskAttemptContext taskContext =
HadoopUtils.createTaskContext(conf.get(), jobID.get(), partitionId);
final OutputFormat<K, V> outputFormat =
getOutputFormat(taskContext.getTaskAttemptID());
return new HadoopWriter<>(
outputFormat.getRecordWriter(taskContext),
outputFormat.getOutputCommitter(taskContext),
taskContext);
} catch (Exception ex) {
throw new RuntimeException(ex);
}
}
代码示例来源:origin: apache/attic-mrunit
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void collect(K key, V value) throws IOException {
try {
if (recordWriter == null) {
if(taskAttemptContext.getOutputKeyClass() == null) {
when(taskAttemptContext.getOutputKeyClass()).thenReturn((Class)key.getClass());
}
if(taskAttemptContext.getOutputValueClass() == null) {
when(taskAttemptContext.getOutputValueClass()).thenReturn((Class)value.getClass());
}
if(taskAttemptContext.getTaskAttemptID() == null) {
when(taskAttemptContext.getTaskAttemptID()).thenReturn(TASK_ID);
}
recordWriter = outputFormat.getRecordWriter(taskAttemptContext);
}
recordWriter.write(key, value);
} catch (InterruptedException e) {
throw new IllegalStateException(e);
}
}
代码示例来源:origin: seznam/euphoria
@Override
public void initialize() {
ExceptionUtils.unchecked(() -> {
final TaskAttemptContext setupContext =
HadoopUtils.createSetupTaskContext(conf.get(), jobID.get());
final OutputFormat<K, V> outputFormat =
getOutputFormat(setupContext.getTaskAttemptID());
// Check for validity of the output-specification for the job.
outputFormat.checkOutputSpecs(setupContext);
// Setup the job output.
outputFormat.getOutputCommitter(setupContext).setupJob(setupContext);
});
}
代码示例来源:origin: org.apache.beam/beam-sdks-java-io-hadoop-format
private RecordWriter<KeyT, ValueT> initRecordWriter(
OutputFormat<KeyT, ValueT> outputFormatObj, TaskAttemptContext taskAttemptContext)
throws IllegalStateException {
try {
LOGGER.info(
"Creating new RecordWriter for task {} of Job with id {}.",
taskAttemptContext.getTaskAttemptID().getTaskID().getId(),
taskAttemptContext.getJobID().getJtIdentifier());
return outputFormatObj.getRecordWriter(taskAttemptContext);
} catch (InterruptedException | IOException e) {
throw new IllegalStateException("Unable to create RecordWriter object: ", e);
}
}
代码示例来源:origin: org.apache.beam/beam-sdks-java-io-hadoop-format
private static OutputCommitter initOutputCommitter(
OutputFormat<?, ?> outputFormatObj,
Configuration conf,
TaskAttemptContext taskAttemptContext)
throws IllegalStateException {
OutputCommitter outputCommitter;
try {
outputCommitter = outputFormatObj.getOutputCommitter(taskAttemptContext);
if (outputCommitter != null) {
outputCommitter.setupJob(new JobContextImpl(conf, taskAttemptContext.getJobID()));
}
} catch (Exception e) {
throw new IllegalStateException("Unable to create OutputCommitter object: ", e);
}
return outputCommitter;
}
代码示例来源:origin: com.github.jiayuhan-it/hadoop-mapreduce-client-common
private org.apache.hadoop.mapreduce.OutputCommitter
createOutputCommitter(boolean newApiCommitter, JobID jobId, Configuration conf) throws Exception {
org.apache.hadoop.mapreduce.OutputCommitter committer = null;
LOG.info("OutputCommitter set in config "
+ conf.get("mapred.output.committer.class"));
if (newApiCommitter) {
org.apache.hadoop.mapreduce.TaskID taskId =
new org.apache.hadoop.mapreduce.TaskID(jobId, TaskType.MAP, 0);
org.apache.hadoop.mapreduce.TaskAttemptID taskAttemptID =
new org.apache.hadoop.mapreduce.TaskAttemptID(taskId, 0);
org.apache.hadoop.mapreduce.TaskAttemptContext taskContext =
new TaskAttemptContextImpl(conf, taskAttemptID);
OutputFormat outputFormat =
ReflectionUtils.newInstance(taskContext.getOutputFormatClass(), conf);
committer = outputFormat.getOutputCommitter(taskContext);
} else {
committer = ReflectionUtils.newInstance(conf.getClass(
"mapred.output.committer.class", FileOutputCommitter.class,
org.apache.hadoop.mapred.OutputCommitter.class), conf);
}
LOG.info("OutputCommitter is " + committer.getClass().getName());
return committer;
}
代码示例来源:origin: co.cask.cdap/cdap-app-fabric
@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
for (String name : MultipleOutputs.getNamedOutputsList(context)) {
Class<? extends OutputFormat> namedOutputFormatClass =
MultipleOutputs.getNamedOutputFormatClass(context, name);
JobContext namedContext = MultipleOutputs.getNamedJobContext(context, name);
OutputFormat<K, V> outputFormat =
ReflectionUtils.newInstance(namedOutputFormatClass, namedContext.getConfiguration());
outputFormat.checkOutputSpecs(namedContext);
}
}
代码示例来源:origin: org.spark-project.hive.hcatalog/hive-hcatalog-core
public MultiRecordWriter(TaskAttemptContext context) throws IOException,
InterruptedException {
baseRecordWriters = new LinkedHashMap<String, BaseRecordWriterContainer>();
String[] aliases = getOutputFormatAliases(context);
for (String alias : aliases) {
LOGGER.info("Creating record writer for alias: " + alias);
TaskAttemptContext aliasContext = getTaskAttemptContext(alias, context);
Configuration aliasConf = aliasContext.getConfiguration();
// Create output directory if not already created.
String outDir = aliasConf.get("mapred.output.dir");
if (outDir != null) {
Path outputDir = new Path(outDir);
FileSystem fs = outputDir.getFileSystem(aliasConf);
if (!fs.exists(outputDir)) {
fs.mkdirs(outputDir);
}
}
OutputFormat<?, ?> outputFormat = getOutputFormatInstance(aliasContext);
baseRecordWriters.put(alias,
new BaseRecordWriterContainer(outputFormat.getRecordWriter(aliasContext),
aliasContext));
}
}
内容来源于网络,如有侵权,请联系作者删除!