import org.apache.spark.sql.functions.udf
// extract the schema id (bytes 1-4) and convert it to integer
val extractSchemaId = udf((message: Array[Byte]) => {
ByteBuffer.wrap(message.slice(1, 5)).getInt()
})
// assuming the "value" column holds the Avro message
df.withColumn("schema_id", extractSchemaId(df("value")))
1条答案
按热度按时间yyhrrdl81#
你可以这样做: