本文整理了Java中org.apache.hadoop.mapred.OutputFormat
类的一些代码示例,展示了OutputFormat
类的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。OutputFormat
类的具体详情如下:
包路径:org.apache.hadoop.mapred.OutputFormat
类名称:OutputFormat
[英]OutputFormat
describes the output-specification for a Map-Reduce job.
The Map-Reduce framework relies on the OutputFormat
of the job to:
OutputFormat
描述Map Reduce作业的输出规范。OutputFormat
来:代码示例来源:origin: apache/avro
private synchronized RecordWriter getRecordWriter(String namedOutput,
String baseFileName,
final Reporter reporter,Schema schema)
throws IOException {
RecordWriter writer = recordWriters.get(baseFileName);
if (writer == null) {
if (countersEnabled && reporter == null) {
throw new IllegalArgumentException(
"Counters are enabled, Reporter cannot be NULL");
}
if(schema!=null)
conf.set(MO_PREFIX+namedOutput+".schema",schema.toString());
JobConf jobConf = new JobConf(conf);
jobConf.set(InternalFileOutputFormat.CONFIG_NAMED_OUTPUT, namedOutput);
FileSystem fs = FileSystem.get(conf);
writer = outputFormat.getRecordWriter(fs, jobConf, baseFileName, reporter);
if (countersEnabled) {
if (reporter == null) {
throw new IllegalArgumentException(
"Counters are enabled, Reporter cannot be NULL");
}
writer = new RecordWriterWithCounter(writer, baseFileName, reporter);
}
recordWriters.put(baseFileName, writer);
}
return writer;
}
代码示例来源:origin: Qihoo360/XLearning
reader = new BufferedReader(new InputStreamReader(xlearningProcess.getInputStream()));
List<OutputInfo> outputs = Arrays.asList(amClient.getOutputLocation());
JobConf jobConf = new JobConf(conf);
jobConf.setOutputKeyClass(Text.class);
jobConf.setOutputValueClass(Text.class);
jobConf.setBoolean("mapred.output.compress", true);
jobConf.set("mapred.output.compression.codec", "org.apache.hadoop.io.compress.GzipCodec");
OutputFormat outputFormat = ReflectionUtils.newInstance(conf.getClass(XLearningConfiguration.XLEARNING_OUTPUTFORMAT_CLASS, XLearningConfiguration.DEFAULT_XLEARNING_OUTPUTF0RMAT_CLASS, OutputFormat.class),
jobConf);
outputFormat.checkOutputSpecs(dfs, jobConf);
JobID jobID = new JobID(new SimpleDateFormat("yyyyMMddHHmm").format(new Date()), 0);
TaskAttemptID taId = new TaskAttemptID(new TaskID(jobID, true, 0), 0);
jobConf.set("mapred.job.id", jobID.toString());
amClient.reportMapedTaskID(containerId, taId.toString());
RecordWriter writer = outputFormat.getRecordWriter(dfs, jobConf, "part-r", Reporter.NULL);
String xlearningStreamResultLine;
while ((xlearningStreamResultLine = reader.readLine()) != null) {
dfs.close();
} catch (Exception e) {
LOG.warn("Exception in thread stdoutRedirectThread");
代码示例来源:origin: org.apache.hadoop/hadoop-mapred
@SuppressWarnings("unchecked")
public DirectMapOutputCollector(TaskUmbilicalProtocol umbilical,
JobConf job, TaskReporter reporter) throws IOException {
this.reporter = reporter;
String finalName = getOutputName(getPartition());
FileSystem fs = FileSystem.get(job);
out = job.getOutputFormat().getRecordWriter(fs, job, finalName, reporter);
mapOutputRecordCounter = reporter.getCounter(TaskCounter.MAP_OUTPUT_RECORDS);
}
代码示例来源:origin: ch.cern.hadoop/hadoop-mapreduce-client-core
@SuppressWarnings("unchecked")
private void createRecordWriter() throws IOException {
FileSystem fs = FileSystem.get(job);
rawWriter = of.getRecordWriter(fs, job, name, progress);
}
}
代码示例来源:origin: org.jvnet.hudson.hadoop/hadoop-core
public void run(JobConf job, final TaskUmbilicalProtocol umbilical)
throws IOException {
job.setBoolean("mapred.skip.on", isSkipping());
boolean isLocal = "local".equals(job.get("mapred.job.tracker", "local"));
if (!isLocal) {
reduceCopier = new ReduceCopier(umbilical, job);
statusUpdate(umbilical);
final FileSystem rfs = FileSystem.getLocal(job).getRaw();
RawKeyValueIterator rIter = isLocal
? Merger.merge(job, rfs, job.getMapOutputKeyClass(),
job.getMapOutputValueClass(), codec, getMapFiles(rfs, true),
!conf.getKeepFailedTaskFiles(), job.getInt("io.sort.factor", 100),
FileSystem fs = FileSystem.get(job);
job.getOutputFormat().getRecordWriter(fs, job, finalName, reporter);
代码示例来源:origin: com.github.jiayuhan-it/hadoop-mapreduce-client-core
@SuppressWarnings({"unchecked"})
public RecordWriter<Object, Object> getRecordWriter(
FileSystem fs, JobConf job, String baseFileName, Progressable progress)
throws IOException {
String nameOutput = job.get(CONFIG_NAMED_OUTPUT, null);
String fileName = getUniqueName(job, baseFileName);
// The following trick leverages the instantiation of a record writer via
// the job conf thus supporting arbitrary output formats.
JobConf outputConf = new JobConf(job);
outputConf.setOutputFormat(getNamedOutputFormatClass(job, nameOutput));
outputConf.setOutputKeyClass(getNamedOutputKeyClass(job, nameOutput));
outputConf.setOutputValueClass(getNamedOutputValueClass(job, nameOutput));
OutputFormat outputFormat = outputConf.getOutputFormat();
return outputFormat.getRecordWriter(fs, outputConf, fileName, progress);
}
}
代码示例来源:origin: com.facebook.hadoop/hadoop-core
Class<INVALUE> valueClass) throws IOException {
Reducer<INKEY,INVALUE,OUTKEY,OUTVALUE> reducer =
ReflectionUtils.newInstance(job.getReducerClass(), job);
FileSystem fs = FileSystem.get(job);
job.getOutputFormat().getRecordWriter(fs, job, finalName, reporter);
job, reporter, umbilical) :
new ReduceValuesIterator<INKEY,INVALUE>(rIter,
job.getOutputValueGroupingComparator(), keyClass, valueClass,
job, reporter);
values.informReduceProgress();
代码示例来源:origin: com.facebook.hadoop/hadoop-core
boolean shared = job.getBoolean("mapred.cache.shared.enabled", false);
int reduces = job.getNumReduceTasks();
JobContext context = new JobContext(job, jobId);
if (reduces == 0 ? job.getUseNewMapper() : job.getUseNewReducer()) {
org.apache.hadoop.mapreduce.OutputFormat<?,?> output =
ReflectionUtils.newInstance(context.getOutputFormatClass(), job);
output.checkOutputSpecs(context);
} else {
job.getOutputFormat().checkOutputSpecs(fs, job);
LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));
List<RawSplit> maps;
if (job.getUseNewMapper()) {
FileSystem.create(fs, submitJobFile,
new FsPermission(JOB_FILE_PERMISSION));
try {
代码示例来源:origin: org.jvnet.hudson.hadoop/hadoop-core
job.getOutputFormat().checkOutputSpecs(fs, job);
LOG.debug("Creating splits at " + fs.makeQualified(submitSplitFile));
InputSplit[] splits =
job.getInputFormat().getSplits(job, job.getNumMapTasks());
FSDataOutputStream out = FileSystem.create(fs,
submitSplitFile, new FsPermission(JOB_FILE_PERMISSION));
try {
out = FileSystem.create(fs, submitJobFile,
new FsPermission(JOB_FILE_PERMISSION));
代码示例来源:origin: apache/hive
/**
* Check for validity of the output-specification for the job.
* @param context information about the job
* @throws IOException when output should not be attempted
*/
@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
org.apache.hadoop.mapred.OutputFormat<? super WritableComparable<?>, ? super Writable> outputFormat = getBaseOutputFormat();
JobConf jobConf = new JobConf(context.getConfiguration());
outputFormat.checkOutputSpecs(null, jobConf);
HCatUtil.copyConf(jobConf, context.getConfiguration());
}
代码示例来源:origin: apache/hive
/**
* Get the record writer for the job. Uses the storagehandler's OutputFormat
* to get the record writer.
* @param context the information about the current task.
* @return a RecordWriter to write the output for the job.
* @throws IOException
*/
@Override
public RecordWriter<WritableComparable<?>, HCatRecord>
getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException {
String name = getOutputName(context);
return new DefaultRecordWriterContainer(context,
getBaseOutputFormat().getRecordWriter(null, new JobConf(context.getConfiguration()), name, InternalUtil.createReporter(context)));
}
代码示例来源:origin: apache/avro
@SuppressWarnings({"unchecked", "deprecation"})
public RecordWriter<Object, Object> getRecordWriter(FileSystem fs,JobConf job, String baseFileName, Progressable arg3) throws IOException {
String nameOutput = job.get(CONFIG_NAMED_OUTPUT, null);
String fileName = getUniqueName(job, baseFileName);
Schema schema = null;
String schemastr = job.get(MO_PREFIX+nameOutput+".schema",null);
if (schemastr!=null)
schema = Schema.parse(schemastr);
JobConf outputConf = new JobConf(job);
outputConf.setOutputFormat(getNamedOutputFormatClass(job, nameOutput));
boolean isMapOnly = job.getNumReduceTasks() == 0;
if (schema != null) {
if (isMapOnly)
AvroJob.setMapOutputSchema(outputConf, schema);
else
AvroJob.setOutputSchema(outputConf, schema);
}
OutputFormat outputFormat = outputConf.getOutputFormat();
return outputFormat.getRecordWriter(fs, outputConf, fileName, arg3);
}
}
代码示例来源:origin: apache/hive
AbstractSerDe serde = new OrcSerde();
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
conf.setInt("mapred.max.split.size", 50);
RecordWriter writer =
outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
Reporter.NULL);
writer.write(NullWritable.get(),
.end()
.build();
conf.set("sarg.pushdown", toKryo(sarg));
conf.set("hive.io.file.readcolumn.names", "z");
properties.setProperty("columns", "z");
properties.setProperty("columns.types", "string");
代码示例来源:origin: apache/flink
@Test
public void testOpen() throws Exception {
OutputFormat<String, Long> dummyOutputFormat = mock(DummyOutputFormat.class);
DummyOutputCommitter outputCommitter = mock(DummyOutputCommitter.class);
JobConf jobConf = Mockito.spy(new JobConf());
when(jobConf.getOutputCommitter()).thenReturn(outputCommitter);
HadoopOutputFormat<String, Long> outputFormat = new HadoopOutputFormat<>(dummyOutputFormat, jobConf);
outputFormat.open(1, 1);
verify(jobConf, times(2)).getOutputCommitter();
verify(outputCommitter, times(1)).setupJob(any(JobContext.class));
verify(dummyOutputFormat, times(1)).getRecordWriter(nullable(FileSystem.class), any(JobConf.class), anyString(), any(Progressable.class));
}
代码示例来源:origin: apache/flink
/**
* create the temporary output file for hadoop RecordWriter.
* @param taskNumber The number of the parallel instance.
* @param numTasks The number of parallel tasks.
* @throws java.io.IOException
*/
@Override
public void open(int taskNumber, int numTasks) throws IOException {
// enforce sequential open() calls
synchronized (OPEN_MUTEX) {
if (Integer.toString(taskNumber + 1).length() > 6) {
throw new IOException("Task id too large.");
}
TaskAttemptID taskAttemptID = TaskAttemptID.forName("attempt__0000_r_"
+ String.format("%" + (6 - Integer.toString(taskNumber + 1).length()) + "s", " ").replace(" ", "0")
+ Integer.toString(taskNumber + 1)
+ "_0");
this.jobConf.set("mapred.task.id", taskAttemptID.toString());
this.jobConf.setInt("mapred.task.partition", taskNumber + 1);
// for hadoop 2.2
this.jobConf.set("mapreduce.task.attempt.id", taskAttemptID.toString());
this.jobConf.setInt("mapreduce.task.partition", taskNumber + 1);
this.context = new TaskAttemptContextImpl(this.jobConf, taskAttemptID);
this.outputCommitter = this.jobConf.getOutputCommitter();
JobContext jobContext = new JobContextImpl(this.jobConf, new JobID());
this.outputCommitter.setupJob(jobContext);
this.recordWriter = this.mapredOutputFormat.getRecordWriter(null, this.jobConf, Integer.toString(taskNumber + 1), new HadoopDummyProgressable());
}
}
代码示例来源:origin: apache/ignite
/**
* @param jobConf Job configuration.
* @param taskCtx Task context.
* @param directWrite Direct write flag.
* @param fileName File name.
* @throws IOException In case of IO exception.
*/
HadoopV1OutputCollector(JobConf jobConf, HadoopTaskContext taskCtx, boolean directWrite,
@Nullable String fileName, TaskAttemptID attempt) throws IOException {
this.jobConf = jobConf;
this.taskCtx = taskCtx;
this.attempt = attempt;
if (directWrite) {
jobConf.set("mapreduce.task.attempt.id", attempt.toString());
OutputFormat outFormat = jobConf.getOutputFormat();
writer = outFormat.getRecordWriter(null, jobConf, fileName, Reporter.NULL);
}
else
writer = null;
}
代码示例来源:origin: apache/attic-mrunit
@Override
public void collect(K key, V value) throws IOException {
// only set if classes are unset to allow setting higher level class when
// using multiple subtypes
if (recordWriter == null) {
setClassIfUnset("mapred.output.key.class", key.getClass());
setClassIfUnset("mapred.output.value.class", value.getClass());
recordWriter = outputFormat.getRecordWriter(
FileSystem.getLocal(outputFormatConf), outputFormatConf,
outputFile.getName(), Reporter.NULL);
}
recordWriter.write(key, value);
}
代码示例来源:origin: apache/hive
OutputFormat<?, ?> outFormat = new OrcOutputFormat();
RecordWriter writer =
outFormat.getRecordWriter(fs, conf, testFilePath.toString(),
Reporter.NULL);
writer.write(NullWritable.get(),
assertEquals(1, splits.length);
ColumnProjectionUtils.appendReadColumns(conf, Collections.singletonList(1));
conf.set("columns", "z,r");
conf.set("columns.types", "int:struct<x:int,y:int>");
org.apache.hadoop.mapred.RecordReader reader =
in.getRecordReader(splits[0], conf, Reporter.NULL);
代码示例来源:origin: com.github.jiayuhan-it/hadoop-mapreduce-client-core
private void checkSpecs(Job job) throws ClassNotFoundException,
InterruptedException, IOException {
JobConf jConf = (JobConf)job.getConfiguration();
// Check the output specification
if (jConf.getNumReduceTasks() == 0 ?
jConf.getUseNewMapper() : jConf.getUseNewReducer()) {
org.apache.hadoop.mapreduce.OutputFormat<?, ?> output =
ReflectionUtils.newInstance(job.getOutputFormatClass(),
job.getConfiguration());
output.checkOutputSpecs(job);
} else {
jConf.getOutputFormat().checkOutputSpecs(jtFs, jConf);
}
}
代码示例来源:origin: apache/hive
@Override
public org.apache.hadoop.mapred.RecordWriter<K, V> getRecordWriter(FileSystem ignored,
JobConf job, String name, Progressable progress) throws IOException {
return (RecordWriter<K, V>) actualOutputFormat.getRecordWriter(ignored,
job, name, progress);
}
内容来源于网络,如有侵权,请联系作者删除!