我正在尝试在数据库中使用spark SQL解析JSON。我对SQL比Spark更熟悉。有什么建议可以解析这个JSON吗?
{
"messageTimestamp": "2023-05-05T10:40:44.692-04:00",
"siteReference": "FL99,FL5,FL55",
"updateCount": 2,
"updates": [
{
"eventTimestamp": "2023-05-05T10:40:44.692-04:00",
"spotReference": "AA18_FL5",
"spotInfo": {
"vehicleSpot": {
"familyType": "VEHICLE_SPOT",
"type": "DockDoor",
"externalReference": "AA18_FL5",
"state": "Busy",
"displayState": "Busy",
"fields": {
"c3FieldStrings": [
{
"externalReference": "4095",
"name": "Blocking Spot"
},
{
"externalReference": "Disabled",
"name": "Disabled",
"value": "false"
},
{
"externalReference": "WorkflowVehicleSpot",
"name": "Spot",
"value": "AA18_FL5"
},
{
"externalReference": "WorkflowVMT",
"name": "WorkflowVMT"
},
{
"externalReference": "SPOT_ExternalReference",
"name": "External Reference",
"value": "AA18_FL5"
},
{
"externalReference": "4094",
"name": "Blocked By Spot"
},
{
"externalReference": "SPOT_SubWarehouse",
"name": "Sub Warehouse",
"value": "FL5_RECV"
},
{
"externalReference": "SPOT_Site",
"name": "Site",
"value": "FL99,FL5,FL55"
},
{
"externalReference": "OriginRestriction",
"name": "Origin Restriction",
"value": "FL5"
},
{
"externalReference": "ZonePathInfo",
"name": "Zone Path Info",
"value": "AA18"
},
{
"externalReference": "UnderReviewedBy",
"name": "Under Reviewed By"
}
],
"c3FieldIntegers": [
{
"externalReference": "Workflow",
"name": "Workflow",
"value": 3637
},
{
"externalReference": "SPOT_VehicleZoneType",
"name": "Vehicle Zone Type"
}
],
"c3FieldDecimals": [],
"c3FieldDateTimes": [
{
"externalReference": "DisabledTimestamp",
"name": "Disabled Timestamp"
},
{
"externalReference": "LockedTimestamp",
"name": "Locked Timestamp",
"value": "2023-05-03T08:24:03.360-04:00"
},
{
"externalReference": "OutgoingTimestamp",
"name": "Outgoing Timestamp",
"value": "2023-05-03T11:20:48.227-04:00"
},
{
"externalReference": "BusyTimestamp",
"name": "Busy Timestamp",
"value": "2023-05-05T10:40:43.887-04:00"
},
{
"externalReference": "ConfirmedDate",
"name": "Confirmed Date",
"value": "2023-05-05T10:40:43.953-04:00"
}
]
}
},
"vehicleData": {
"familyType": "VEHICLE_DATA",
"type": "Vehicle",
"externalReference": "ONEY-TLLU4069090",
"state": "InUse",
"displayState": "Waiting for Errand",
"fields": {
"c3FieldStrings": [
{
"externalReference": "Vendor",
"name": "Vendor"
},
{
"externalReference": "WorkflowVehicleSpot",
"name": "WorkflowVehicleSpot",
"value": "AA18_FL5"
},
{
"externalReference": "WorkflowWorkerExecution",
"name": "WorkflowWorkerExecution"
},
{
"externalReference": "FaultSummary",
"name": "Fault Summary"
},
{
"externalReference": "WorkflowReservation",
"name": "WorkflowReservation",
"value": "13227269"
},
{
"externalReference": "ChassisNumber",
"name": "Chassis #"
}
],
"c3FieldIntegers": [
{
"externalReference": "VehicleZoneType",
"name": "Vehicle Zone Type"
},
{
"externalReference": "Workflow",
"name": "Workflow",
"value": 8203
},
{
"externalReference": "FTX",
"name": "FTX",
"value": 7
},
{
"externalReference": "FaultCode",
"name": "Fault Code"
}
],
"c3FieldDecimals": [],
"c3FieldDateTimes": [
{
"externalReference": "ArrivalTime",
"name": "Arrival Time",
"value": "2023-04-27T10:55:42.800-04:00"
},
{
"externalReference": "DepartureTime",
"name": "Departure Time"
},
{
"externalReference": "PMDate",
"name": "PM Date"
},
{
"externalReference": "CreatedTimestamp",
"name": "Created Timestamp",
"value": "2023-04-27T10:55:06.750-04:00"
},
{
"externalReference": "AnnualInspectionDate",
"name": "Annual Inspection Date"
}
]
}
},
"vehicleExecution": {
"familyType": "VEHICLE_EXECUTION",
"type": "Delivery",
"externalReference": "1667328",
"state": "WaitingforErrand",
"displayState": "Waiting for Errand",
"fields": {
"c3FieldStrings": [
{
"externalReference": "WorkflowVehicleSpot",
"name": "Spot",
"value": "AA18_FL5"
},
{
"externalReference": "CreatedAtGate",
"name": "Created at Gate",
"value": "false"
},
{
"externalReference": "WorkflowVMT",
"name": "WorkflowVMT"
},
{
"externalReference": "ExternalReference",
"name": "External Reference",
"value": "1667328"
}
],
"c3FieldIntegers": [
{
"externalReference": "Workflow",
"name": "Workflow",
"value": 173227
},
{
"externalReference": "4112",
"name": "ErrandSequence"
},
{
"externalReference": "ErrandType",
"name": "Errand Type"
}
],
"c3FieldDecimals": [],
"c3FieldDateTimes": [
{
"externalReference": "Ready for DepartureTimestamp",
"name": "Ready for Departure Timestamp"
},
{
"externalReference": "Doing ErrandTimestamp",
"name": "Doing Errand Timestamp"
},
{
"externalReference": "Waiting for ErrandTimestamp",
"name": "Waiting for Errand Timestamp",
"value": "2023-04-27T11:00:51.233-04:00"
},
{
"externalReference": "DepartureTime",
"name": "Departure Time"
}
]
}
}
}
},
{
"eventTimestamp": "2023-05-05T10:40:44.692-04:00",
"spotReference": "FL99_A-025",
"spotInfo": {
"vehicleSpot": {
"familyType": "VEHICLE_SPOT",
"type": "Parking",
"externalReference": "FL99_A-025",
"state": "Available",
"displayState": "Available",
"fields": {
"c3FieldStrings": [
{
"externalReference": "4095",
"name": "Blocking Spot"
},
{
"externalReference": "Disabled",
"name": "Disabled",
"value": "false"
},
{
"externalReference": "WorkflowVehicleSpot",
"name": "Spot",
"value": "FL99_A-025"
}
],
"c3FieldIntegers": [
{
"externalReference": "Workflow",
"name": "Workflow",
"value": 3723
},
{
"externalReference": "SPOT_VehicleZoneType",
"name": "Vehicle Zone Type"
}
],
"c3FieldDecimals": [],
"c3FieldDateTimes": [
{
"externalReference": "DisabledTimestamp",
"name": "Disabled Timestamp"
},
{
"externalReference": "LockedTimestamp",
"name": "Locked Timestamp"
},
{
"externalReference": "OutgoingTimestamp",
"name": "Outgoing Timestamp",
"value": "2023-05-03T17:05:23.363-04:00"
}
]
}
}
}
}
]
}
请看附件
1条答案
按热度按时间vshtjzan1#
有几种方法可以实现这一点:
COPY INTO
SQL命令(doc,tutorial),该命令可以从云存储读取文件,根据指定的文件格式解析文件,然后可以应用转换。实际的实现将取决于您是希望摄取所有数据还是仅提取特定字段等。此命令提供增量数据摄取,而无需重新摄取已处理的数据。就像这样: