org.apache.hadoop.hive.ql.io.orc.Reader.rowsOptions()方法的使用及代码示例

x33g5p2x  于2022-01-29 转载在 其他  
字(10.6k)|赞(0)|评价(0)|浏览(162)

本文整理了Java中org.apache.hadoop.hive.ql.io.orc.Reader.rowsOptions方法的一些代码示例,展示了Reader.rowsOptions的具体用法。这些代码示例主要来源于Github/Stackoverflow/Maven等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Reader.rowsOptions方法的具体详情如下:
包路径:org.apache.hadoop.hive.ql.io.orc.Reader
类名称:Reader
方法名:rowsOptions

Reader.rowsOptions介绍

[英]Create a RecordReader that uses the options given. This method can't be named rows, because many callers used rows(null) before the rows() method was introduced.
[中]创建一个使用给定选项的RecordReader。此方法不能命名为rows,因为在引入rows()方法之前,许多调用者使用了rows(null)。

代码示例

代码示例来源:origin: apache/drill

public DeleteReaderValue(Reader deleteDeltaReader, Reader.Options readerOptions, int bucket,
  ValidTxnList validTxnList) throws IOException {
 this.recordReader  = deleteDeltaReader.rowsOptions(readerOptions);
 this.bucketForSplit = bucket;
 this.batch = deleteDeltaReader.getSchema().createRowBatch();
 if (!recordReader.nextBatch(batch)) { // Read the first batch.
  this.batch = null; // Oh! the first batch itself was null. Close the reader.
 }
 this.indexPtrInBatch = 0;
 this.validTxnList = validTxnList;
}

代码示例来源:origin: apache/drill

/**
 * Create a reader that reads from the first key larger than minKey to any
 * keys equal to maxKey.
 * @param key the key to read into
 * @param reader the ORC file reader
 * @param bucket the bucket number for the file
 * @param minKey only return keys larger than minKey if it is non-null
 * @param maxKey only return keys less than or equal to maxKey if it is
 *               non-null
 * @param options options to provide to read the rows.
 * @param statementId id of SQL statement within a transaction
 * @throws IOException
 */
ReaderPair(ReaderKey key, Reader reader, int bucket,
      RecordIdentifier minKey, RecordIdentifier maxKey,
      ReaderImpl.Options options, int statementId) throws IOException {
 this.reader = reader;
 this.key = key;
 this.maxKey = maxKey;
 this.bucket = bucket;
 // TODO use stripe statistics to jump over stripes
 recordReader = reader.rowsOptions(options);
 this.statementId = statementId;
 // advance the reader until we reach the minimum key
 do {
  next(nextRecord);
 } while (nextRecord != null &&
   (minKey != null && key.compareRow(minKey) <= 0));
}

代码示例来源:origin: apache/hive

@Override
public void next(OrcStruct next) throws IOException {
 while(true) {
  if(nextFromCurrentFile(next)) {
   return;
  } else {
   if (originalFiles.size() <= nextFileIndex) {
    //no more original files to read
    nextRecord = null;
    recordReader.close();
    return;
   } else {
    rowIdOffset += reader.getNumberOfRows();
    recordReader.close();
    reader = advanceToNextFile();
    if(reader == null) {
     nextRecord = null;
     return;
    }
    recordReader = reader.rowsOptions(options, conf);
   }
  }
 }
}
/**

代码示例来源:origin: apache/hive

/**
 * Create a reader that reads from the first key larger than minKey to any
 * keys equal to maxKey.
 * @param key the key to read into
 * @param reader the ORC file reader
 * @param minKey only return keys larger than minKey if it is non-null
 * @param maxKey only return keys less than or equal to maxKey if it is
 *               non-null
 * @param options options to provide to read the rows.
 * @param conf
 * @throws IOException
 */
@VisibleForTesting
ReaderPairAcid(ReaderKey key, Reader reader,
 RecordIdentifier minKey, RecordIdentifier maxKey,
 ReaderImpl.Options options, final Configuration conf) throws IOException {
 this.reader = reader;
 this.key = key;
 // TODO use stripe statistics to jump over stripes
 recordReader = reader.rowsOptions(options, conf);
 this.minKey = minKey;
 this.maxKey = maxKey;
 // advance the reader until we reach the minimum key
 do {
  next(nextRecord());
 } while (nextRecord() != null &&
  (minKey != null && key.compareRow(getMinKey()) <= 0));
}
@Override

代码示例来源:origin: apache/hive

innerReader = reader.rowsOptions(readerOptions.range(offset, length), conf);
baseReader = new org.apache.hadoop.mapred.RecordReader<NullWritable, VectorizedRowBatch>() {

代码示例来源:origin: apache/hive

private Reader createMockOriginalReader() throws IOException {
 Reader reader = Mockito.mock(Reader.class, settings);
 RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
 OrcStruct row1 = createOriginalRow("first");
 OrcStruct row2 = createOriginalRow("second");
 OrcStruct row3 = createOriginalRow("third");
 OrcStruct row4 = createOriginalRow("fourth");
 OrcStruct row5 = createOriginalRow("fifth");
 Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class)))
   .thenReturn(recordReader);
 Mockito.when(recordReader.hasNext()).
   thenReturn(true, true, true, true, true, false);
 Mockito.when(recordReader.getRowNumber()).thenReturn(0L, 1L, 2L, 3L, 4L);
 Mockito.when(recordReader.next(null)).thenReturn(row1);
 Mockito.when(recordReader.next(row1)).thenReturn(row2);
 Mockito.when(recordReader.next(row2)).thenReturn(row3);
 Mockito.when(recordReader.next(row3)).thenReturn(row4);
 Mockito.when(recordReader.next(row4)).thenReturn(row5);
 return reader;
}

代码示例来源:origin: apache/hive

DeleteReaderValue(Reader deleteDeltaReader, Path deleteDeltaFile,
  Reader.Options readerOptions, int bucket, ValidWriteIdList validWriteIdList,
  boolean isBucketedTable, final JobConf conf,
  OrcRawRecordMerger.KeyInterval keyInterval, OrcSplit orcSplit)
  throws IOException {
 this.reader = deleteDeltaReader;
 this.deleteDeltaFile = deleteDeltaFile;
 this.recordReader  = deleteDeltaReader.rowsOptions(readerOptions, conf);
 this.bucketForSplit = bucket;
 final boolean useDecimal64ColumnVector = HiveConf.getVar(conf, ConfVars
  .HIVE_VECTORIZED_INPUT_FORMAT_SUPPORTS_ENABLED).equalsIgnoreCase("decimal_64");
 if (useDecimal64ColumnVector) {
  this.batch = deleteDeltaReader.getSchema().createRowBatchV2();
 } else {
  this.batch = deleteDeltaReader.getSchema().createRowBatch();
 }
 if (!recordReader.nextBatch(batch)) { // Read the first batch.
  this.batch = null; // Oh! the first batch itself was null. Close the reader.
 }
 this.indexPtrInBatch = 0;
 this.validWriteIdList = validWriteIdList;
 this.isBucketedTable = isBucketedTable;
 if(batch != null) {
  checkBucketId();//check 1st batch
 }
 this.keyInterval = keyInterval;
 this.orcSplit = orcSplit;
 this.numEvents = deleteDeltaReader.getNumberOfRows();
 LOG.debug("Num events stats({},x,x)", numEvents);
}

代码示例来源:origin: apache/hive

private Reader createMockReader() throws IOException {
 Reader reader = Mockito.mock(Reader.class, settings);
 RecordReader recordReader = Mockito.mock(RecordReader.class, settings);
 OrcStruct row1 = new OrcStruct(OrcRecordUpdater.FIELDS);
 setRow(row1, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 20, 100, "first");
 OrcStruct row2 = new OrcStruct(OrcRecordUpdater.FIELDS);
 setRow(row2, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 30, 110, "second");
 OrcStruct row3 = new OrcStruct(OrcRecordUpdater.FIELDS);
 setRow(row3, OrcRecordUpdater.INSERT_OPERATION, 10, 20, 40, 120, "third");
 OrcStruct row4 = new OrcStruct(OrcRecordUpdater.FIELDS);
 setRow(row4, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 60, 130, "fourth");
 OrcStruct row5 = new OrcStruct(OrcRecordUpdater.FIELDS);
 setRow(row5, OrcRecordUpdater.INSERT_OPERATION, 40, 50, 61, 140, "fifth");
 Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class)))
   .thenReturn(recordReader);
 Mockito.when(recordReader.hasNext()).
   thenReturn(true, true, true, true, true, false);
 Mockito.when(recordReader.getProgress()).thenReturn(1.0f);
 Mockito.when(recordReader.next(null)).thenReturn(row1);
 Mockito.when(recordReader.next(row1)).thenReturn(row2);
 Mockito.when(recordReader.next(row2)).thenReturn(row3);
 Mockito.when(recordReader.next(row3)).thenReturn(row4);
 Mockito.when(recordReader.next(row4)).thenReturn(row5);
 return reader;
}

代码示例来源:origin: apache/hive

" in " + mergerOptions.getRootPath());
recordReader = getReader().rowsOptions(options, conf);

代码示例来源:origin: apache/hive

OrcInputFormat.setSearchArgument(options, types, conf, true);
this.reader = file.rowsOptions(options, conf);

代码示例来源:origin: apache/hive

public static RecordReader createReaderFromFile(Reader file,
                        Configuration conf,
                        long offset, long length
                        ) throws IOException {
 if (AcidUtils.isFullAcidScan(conf)) {
  raiseAcidTablesMustBeReadWithAcidReaderException(conf);
 }
 /**
  * Do we have schema on read in the configuration variables?
  */
 TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE);
 Reader.Options options = new Reader.Options(conf).range(offset, length);
 options.schema(schema);
 boolean isOriginal = isOriginal(file);
 if (schema == null) {
  schema = file.getSchema();
 }
 List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
 options.include(genIncludedColumns(schema, conf));
 setSearchArgument(options, types, conf, isOriginal);
 return file.rowsOptions(options, conf);
}

代码示例来源:origin: apache/drill

this.baseReader = reader.rowsOptions(readerOptions.range(offset, length));

代码示例来源:origin: apache/drill

OrcInputFormat.setSearchArgument(options, types, conf, true);
this.reader = file.rowsOptions(options);

代码示例来源:origin: apache/drill

public static RecordReader createReaderFromFile(Reader file,
                        Configuration conf,
                        long offset, long length
                        ) throws IOException {
 boolean isTransactionalTableScan = HiveConf.getBoolVar(conf, ConfVars.HIVE_TRANSACTIONAL_TABLE_SCAN);
 if (isTransactionalTableScan) {
  raiseAcidTablesMustBeReadWithAcidReaderException(conf);
 }
 /**
  * Do we have schema on read in the configuration variables?
  */
 TypeDescription schema = getDesiredRowTypeDescr(conf, false, Integer.MAX_VALUE);
 Reader.Options options = new Reader.Options().range(offset, length);
 options.schema(schema);
 boolean isOriginal = isOriginal(file);
 if (schema == null) {
  schema = file.getSchema();
 }
 List<OrcProto.Type> types = OrcUtils.getOrcTypes(schema);
 options.include(genIncludedColumns(schema, conf));
 setSearchArgument(options, types, conf, isOriginal);
 return file.rowsOptions(options);
}

代码示例来源:origin: apache/hive

.end()
  .build();
RecordReader rows = reader.rowsOptions(new Reader.Options()
  .range(0L, Long.MAX_VALUE)
  .include(new boolean[]{true, true, true})
  .end()
  .build();
rows = reader.rowsOptions(new Reader.Options()
  .range(0L, Long.MAX_VALUE)
  .include(new boolean[]{true, true, true})
  .end()
  .build();
rows = reader.rowsOptions(new Reader.Options()
  .range(0L, Long.MAX_VALUE)
  .include(new boolean[]{true, true, true})

代码示例来源:origin: apache/hive

Mockito.when(reader.rowsOptions(Mockito.any(Reader.Options.class), Mockito.any(HiveConf.class)))
  .thenReturn(recordReader);

代码示例来源:origin: apache/hive

recordReader = reader.rowsOptions(options, conf);

代码示例来源:origin: apache/hive

Reader reader = OrcFile.createReader(testFilePath,
 OrcFile.readerOptions(conf).filesystem(fs));
RecordReader rows = reader.rowsOptions(new Reader.Options()
 .schema(readerSchema));
batch = readerSchema.createRowBatchV2();
rows = reader.rowsOptions(new Reader.Options()
 .schema(readerSchema)
 .include(new boolean[]{false, true, true, true, false, false, true}));

代码示例来源:origin: apache/hive

Reader reader = OrcFile.createReader(testFilePath,
  OrcFile.readerOptions(conf).filesystem(fs));
RecordReader rows = reader.rowsOptions(new Reader.Options()
  .schema(readerSchema));
batch = readerSchema.createRowBatch();
rows = reader.rowsOptions(new Reader.Options()
  .schema(readerSchema)
  .include(new boolean[]{false, true, true, true, false, false, true}));

代码示例来源:origin: apache/hive

OrcFile.ReaderOptions qlReaderOptions = OrcFile.readerOptions(conf).maxLength(split.getFileLength());
Reader reader = OrcFile.createReader(split.getPath(), qlReaderOptions);
RecordReader recordReader = reader.rowsOptions(orcReaderOptions);
for(int j = 0; recordReader.hasNext(); j++) {
 long rowNum = (i * 5000) + j;

相关文章