本文整理了Java中org.apache.spark.api.java.function.Function.call()
方法的一些代码示例,展示了Function.call()
的具体用法。这些代码示例主要来源于Github
/Stackoverflow
/Maven
等平台,是从一些精选项目中提取出来的代码,具有较强的参考意义,能在一定程度帮忙到你。Function.call()
方法的具体详情如下:
包路径:org.apache.spark.api.java.function.Function
类名称:Function
方法名:call
暂无
代码示例来源:origin: apache/kylin
@Override
public Tuple2<K, V> next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
final LinkedList<V> values = new LinkedList();
K currentKey = current._1();
values.add(current._2());
while (input.hasNext()) {
Tuple2<K, V> next = input.next();
if (comparator.compare(currentKey, next._1()) == 0) {
values.add(next._2());
} else {
current = next;
try {
return new Tuple2<>(currentKey, converter.call(values));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
if (!input.hasNext()) {
current = null;
}
try {
return new Tuple2<>(currentKey, converter.call(values));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
代码示例来源:origin: OryxProject/oryx
/**
* Implementation which splits based solely on time. It will return approximately
* the earliest {@link #getTestFraction()} of input, ordered by timestamp, as new training
* data and the rest as test data.
*/
@Override
protected Pair<JavaRDD<String>,JavaRDD<String>> splitNewDataToTrainTest(JavaRDD<String> newData) {
// Rough approximation; assumes timestamps are fairly evenly distributed
StatCounter maxMin = newData.mapToDouble(line -> MLFunctions.TO_TIMESTAMP_FN.call(line).doubleValue()).stats();
long minTime = (long) maxMin.min();
long maxTime = (long) maxMin.max();
log.info("New data timestamp range: {} - {}", minTime, maxTime);
long approxTestTrainBoundary = (long) (maxTime - getTestFraction() * (maxTime - minTime));
log.info("Splitting at timestamp {}", approxTestTrainBoundary);
JavaRDD<String> newTrainData = newData.filter(
line -> MLFunctions.TO_TIMESTAMP_FN.call(line) < approxTestTrainBoundary);
JavaRDD<String> testData = newData.filter(
line -> MLFunctions.TO_TIMESTAMP_FN.call(line) >= approxTestTrainBoundary);
return new Pair<>(newTrainData, testData);
}
代码示例来源:origin: OryxProject/oryx
@Test(expected = ArrayIndexOutOfBoundsException.class)
public void testParseBadTimestamp() throws Exception {
MLFunctions.TO_TIMESTAMP_FN.call("[1,2,3]");
}
代码示例来源:origin: OryxProject/oryx
@Test(expected = IOException.class)
public void testParseBadLine() throws Exception {
MLFunctions.PARSE_FN.call("[1,]");
}
代码示例来源:origin: OryxProject/oryx
@Test
public void testSumWithNaN() throws Exception {
OryxTest.assertEquals(1.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0)).doubleValue());
OryxTest.assertEquals(6.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, 2.0, 3.0)).doubleValue());
OryxTest.assertEquals(3.0, MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, Double.NaN, 3.0)).doubleValue());
assertNaN(MLFunctions.SUM_WITH_NAN.call(Arrays.asList(1.0, 2.0, Double.NaN)));
assertNaN(MLFunctions.SUM_WITH_NAN.call(Arrays.asList(Double.NaN)));
}
代码示例来源:origin: OryxProject/oryx
@Test
public void testToTimestamp() throws Exception {
assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("a,b,c,123").longValue());
assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("a,b,c,123,").longValue());
assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("[\"a\",\"b\",\"c\",123]").longValue());
assertEquals(123L, MLFunctions.TO_TIMESTAMP_FN.call("[\"a\",\"b\",\"c\",123,\"d\"]").longValue());
}
代码示例来源:origin: OryxProject/oryx
@Test
public void testParseJSON() throws Exception {
assertArrayEquals(new String[]{"a", "1", "foo"},
MLFunctions.PARSE_FN.call("[\"a\",\"1\",\"foo\"]"));
assertArrayEquals(new String[]{"a", "1", "foo", ""},
MLFunctions.PARSE_FN.call("[\"a\",\"1\",\"foo\",\"\"]"));
assertArrayEquals(new String[]{"2.3"},
MLFunctions.PARSE_FN.call("[\"2.3\"]"));
assertArrayEquals(new String[]{},
MLFunctions.PARSE_FN.call("[]"));
}
代码示例来源:origin: OryxProject/oryx
@Test
public void testParseCSV() throws Exception {
assertArrayEquals(new String[]{"a", "1", "foo"},
MLFunctions.PARSE_FN.call("a,1,foo"));
assertArrayEquals(new String[]{"a", "1", "foo", ""},
MLFunctions.PARSE_FN.call("a,1,foo,"));
assertArrayEquals(new String[]{"2.3"},
MLFunctions.PARSE_FN.call("2.3"));
// Different from JSON, sort of:
assertArrayEquals(new String[]{""},
MLFunctions.PARSE_FN.call(""));
}
代码示例来源:origin: OryxProject/oryx
JavaPairRDD<Tuple2<String,String>,Double> tuples = sortedValues.mapToPair(line -> {
try {
String[] tokens = MLFunctions.PARSE_FN.call(line);
String user = tokens[0];
String item = tokens[1];
代码示例来源:origin: org.apache.crunch/crunch-spark
@Override
public R call(T t) throws Exception {
return f.call(t);
}
};
代码示例来源:origin: apache/crunch
@Override
public R call(T t) throws Exception {
return f.call(t);
}
};
代码示例来源:origin: org.qcri.rheem/rheem-spark
@Override
public OutputType call(InputType v1) throws Exception {
if (this.isFirstRun) {
((org.qcri.rheem.core.function.ExtendedFunction) this.impl).open(this.executionContext);
this.isFirstRun = false;
}
return this.impl.call(v1);
}
代码示例来源:origin: apache/incubator-nemo
@Override
public void onData(final I element) {
try {
outputCollector.emit(func.call(element));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
代码示例来源:origin: org.apache.beam/beam-runners-spark
/**
* A utility method that adapts {@link Function} to a {@link FlatMapFunction} with an {@link
* Iterator} input. This is particularly useful because it allows to use functions written for map
* functions in flatmap functions.
*
* @param func the {@link Function} to adapt.
* @param <InputT> the input type.
* @param <OutputT> the output type.
* @return a {@link FlatMapFunction} that accepts an {@link Iterator} as an input and applies the
* {@link Function} on every element.
*/
public static <InputT, OutputT>
FlatMapFunction<Iterator<InputT>, OutputT> functionToFlatMapFunction(
final Function<InputT, OutputT> func) {
return itr -> {
final Iterator<OutputT> outputItr =
Iterators.transform(
itr,
t -> {
try {
return func.call(t);
} catch (Exception e) {
throw new RuntimeException(e);
}
});
return outputItr;
};
}
代码示例来源:origin: net.sansa-stack/sansa-rdf-spark
public static <I, O> org.apache.spark.api.java.function.Function<I, O> wrap(org.apache.spark.api.java.function.Function<I, O> fn) {
JavaKryoSerializationWrapper<org.apache.spark.api.java.function.Function<I, O>> wrapper = new JavaKryoSerializationWrapper<>(fn);
org.apache.spark.api.java.function.Function<I, O> result = i -> wrapper.getValue().call(i);
return result;
}
代码示例来源:origin: org.qcri.rheem/rheem-iejoin
public Data call(Tuple2<Long, Input> in) throws Exception {
return new Data<TypeXPivot, TypeXRef>(in._1(),
//(TypeXPivot) in._2().getField(getXPivot),
//(TypeXRef) in._2().getField(getXRef));
this.getXPivot.call(in._2()), this.getXRef.call(in._2()));
}
}
代码示例来源:origin: org.apache.kylin/kylin-engine-spark
@Override
public Tuple2<K, V> next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
final LinkedList<V> values = new LinkedList();
K currentKey = current._1();
values.add(current._2());
while (input.hasNext()) {
Tuple2<K, V> next = input.next();
if (comparator.compare(currentKey, next._1()) == 0) {
values.add(next._2());
} else {
current = next;
try {
return new Tuple2<>(currentKey, converter.call(values));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
if (!input.hasNext()) {
current = null;
}
try {
return new Tuple2<>(currentKey, converter.call(values));
} catch (Exception e) {
throw new RuntimeException(e);
}
}
代码示例来源:origin: org.qcri.rheem/rheem-iejoin
@SuppressWarnings("unchecked")
public Iterator<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>> call(Integer in,
Iterator<Tuple2<Long, Input>> arg0) throws Exception {
ArrayList<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>> outList = new ArrayList<List2AttributesObjectSkinny<TypeXPivot, TypeXRef>>(1);
ArrayList<Data<TypeXPivot, TypeXRef>> list1 = new ArrayList<Data<TypeXPivot, TypeXRef>>(300000);
while (arg0.hasNext()) {
Tuple2<Long, Input> t2 = arg0.next();
Input t = t2._2().copy();
list1.add(new Data(t2._1(), getXPivot.call(t), getXRef.call(t)));//(TypeXPivot) t.getField(getXPivot), (TypeXRef) t.getField(getXRef)));
}
Collections.sort(list1, new Data.Comparator(list1ASC, list1ASCSec));
Data[] myData = new Data[list1.size()];
list1.toArray(myData);
List2AttributesObjectSkinny<TypeXPivot, TypeXRef> lo = new List2AttributesObjectSkinny<TypeXPivot, TypeXRef>(myData,
in);
if (!lo.isEmpty()) {
outList.add(lo);
}
return outList.iterator();
}
}
代码示例来源:origin: uber/marmaray
@Override
public List<ConverterResult<byte[], AvroPayload>> convert(@NonNull final byte[] data) throws Exception {
GenericRecord genericRecord = this.schemaServiceReader.read(data);
for (Function<GenericRecord, GenericRecord> func : this.updateFunctions) {
genericRecord = func.call(genericRecord);
}
return Collections.singletonList(new ConverterResult(new AvroPayload(genericRecord, this.fieldsToCache)));
}
}
代码示例来源:origin: uber/marmaray
public static <T extends HoodieRecordPayload> JavaRDD<HoodieRecord<T>> combineRecords(
final JavaRDD<HoodieRecord<T>> records, final Function<HoodieRecord<T>, Object> recordKeyFunc,
final int parallelism) {
return records
.mapToPair(record -> new Tuple2<>(recordKeyFunc.call(record), record))
.reduceByKey((rec1, rec2) -> {
@SuppressWarnings("unchecked")
T reducedData = (T) rec1.getData().preCombine(rec2.getData());
return new HoodieRecord<T>(rec1.getKey(), reducedData);
}, parallelism)
.map(recordTuple -> recordTuple._2());
}
}
内容来源于网络,如有侵权,请联系作者删除!