数据块使用spark SQL解析JSON

jhkqcmku  于 2023-05-19  发布在  Spark
关注(0)|答案(1)|浏览(200)

我正在尝试在数据库中使用spark SQL解析JSON。我对SQL比Spark更熟悉。有什么建议可以解析这个JSON吗?

{
    "messageTimestamp": "2023-05-05T10:40:44.692-04:00",
    "siteReference": "FL99,FL5,FL55",
    "updateCount": 2,
    "updates": [
        {
            "eventTimestamp": "2023-05-05T10:40:44.692-04:00",
            "spotReference": "AA18_FL5",
            "spotInfo": {
                "vehicleSpot": {
                    "familyType": "VEHICLE_SPOT",
                    "type": "DockDoor",
                    "externalReference": "AA18_FL5",
                    "state": "Busy",
                    "displayState": "Busy",
                    "fields": {
                        "c3FieldStrings": [
                            {
                                "externalReference": "4095",
                                "name": "Blocking Spot"
                            },
                            {
                                "externalReference": "Disabled",
                                "name": "Disabled",
                                "value": "false"
                            },
                            {
                                "externalReference": "WorkflowVehicleSpot",
                                "name": "Spot",
                                "value": "AA18_FL5"
                            },
                            {
                                "externalReference": "WorkflowVMT",
                                "name": "WorkflowVMT"
                            },
                            {
                                "externalReference": "SPOT_ExternalReference",
                                "name": "External Reference",
                                "value": "AA18_FL5"
                            },
                            {
                                "externalReference": "4094",
                                "name": "Blocked By Spot"
                            },
                            {
                                "externalReference": "SPOT_SubWarehouse",
                                "name": "Sub Warehouse",
                                "value": "FL5_RECV"
                            },
                            {
                                "externalReference": "SPOT_Site",
                                "name": "Site",
                                "value": "FL99,FL5,FL55"
                            },
                            {
                                "externalReference": "OriginRestriction",
                                "name": "Origin Restriction",
                                "value": "FL5"
                            },
                            {
                                "externalReference": "ZonePathInfo",
                                "name": "Zone Path Info",
                                "value": "AA18"
                            },
                            {
                                "externalReference": "UnderReviewedBy",
                                "name": "Under Reviewed By"
                            }
                        ],
                        "c3FieldIntegers": [
                            {
                                "externalReference": "Workflow",
                                "name": "Workflow",
                                "value": 3637
                            },
                            {
                                "externalReference": "SPOT_VehicleZoneType",
                                "name": "Vehicle Zone Type"
                            }
                        ],
                        "c3FieldDecimals": [],
                        "c3FieldDateTimes": [
                            {
                                "externalReference": "DisabledTimestamp",
                                "name": "Disabled Timestamp"
                            },
                            {
                                "externalReference": "LockedTimestamp",
                                "name": "Locked Timestamp",
                                "value": "2023-05-03T08:24:03.360-04:00"
                            },
                            {
                                "externalReference": "OutgoingTimestamp",
                                "name": "Outgoing Timestamp",
                                "value": "2023-05-03T11:20:48.227-04:00"
                            },
                            {
                                "externalReference": "BusyTimestamp",
                                "name": "Busy Timestamp",
                                "value": "2023-05-05T10:40:43.887-04:00"
                            },
                            {
                                "externalReference": "ConfirmedDate",
                                "name": "Confirmed Date",
                                "value": "2023-05-05T10:40:43.953-04:00"
                            }
                        ]
                    }
                },
                "vehicleData": {
                    "familyType": "VEHICLE_DATA",
                    "type": "Vehicle",
                    "externalReference": "ONEY-TLLU4069090",
                    "state": "InUse",
                    "displayState": "Waiting for Errand",
                    "fields": {
                        "c3FieldStrings": [
                            {
                                "externalReference": "Vendor",
                                "name": "Vendor"
                            },
                            {
                                "externalReference": "WorkflowVehicleSpot",
                                "name": "WorkflowVehicleSpot",
                                "value": "AA18_FL5"
                            },
                            {
                                "externalReference": "WorkflowWorkerExecution",
                                "name": "WorkflowWorkerExecution"
                            },
                            {
                                "externalReference": "FaultSummary",
                                "name": "Fault Summary"
                            },
                            {
                                "externalReference": "WorkflowReservation",
                                "name": "WorkflowReservation",
                                "value": "13227269"
                            },
                            {
                                "externalReference": "ChassisNumber",
                                "name": "Chassis #"
                            }
                        ],
                        "c3FieldIntegers": [
                            {
                                "externalReference": "VehicleZoneType",
                                "name": "Vehicle Zone Type"
                            },
                            {
                                "externalReference": "Workflow",
                                "name": "Workflow",
                                "value": 8203
                            },
                            {
                                "externalReference": "FTX",
                                "name": "FTX",
                                "value": 7
                            },
                            {
                                "externalReference": "FaultCode",
                                "name": "Fault Code"
                            }
                        ],
                        "c3FieldDecimals": [],
                        "c3FieldDateTimes": [
                            {
                                "externalReference": "ArrivalTime",
                                "name": "Arrival Time",
                                "value": "2023-04-27T10:55:42.800-04:00"
                            },
                            {
                                "externalReference": "DepartureTime",
                                "name": "Departure Time"
                            },
                            {
                                "externalReference": "PMDate",
                                "name": "PM Date"
                            },
                            {
                                "externalReference": "CreatedTimestamp",
                                "name": "Created Timestamp",
                                "value": "2023-04-27T10:55:06.750-04:00"
                            },
                            {
                                "externalReference": "AnnualInspectionDate",
                                "name": "Annual Inspection Date"
                            }
                        ]
                    }
                },
                "vehicleExecution": {
                    "familyType": "VEHICLE_EXECUTION",
                    "type": "Delivery",
                    "externalReference": "1667328",
                    "state": "WaitingforErrand",
                    "displayState": "Waiting for Errand",
                    "fields": {
                        "c3FieldStrings": [
                            {
                                "externalReference": "WorkflowVehicleSpot",
                                "name": "Spot",
                                "value": "AA18_FL5"
                            },
                            {
                                "externalReference": "CreatedAtGate",
                                "name": "Created at Gate",
                                "value": "false"
                            },
                            {
                                "externalReference": "WorkflowVMT",
                                "name": "WorkflowVMT"
                            },
                            {
                                "externalReference": "ExternalReference",
                                "name": "External Reference",
                                "value": "1667328"
                            }
                        ],
                        "c3FieldIntegers": [
                            {
                                "externalReference": "Workflow",
                                "name": "Workflow",
                                "value": 173227
                            },
                            {
                                "externalReference": "4112",
                                "name": "ErrandSequence"
                            },
                            {
                                "externalReference": "ErrandType",
                                "name": "Errand Type"
                            }
                        ],
                        "c3FieldDecimals": [],
                        "c3FieldDateTimes": [
                            {
                                "externalReference": "Ready for DepartureTimestamp",
                                "name": "Ready for Departure Timestamp"
                            },
                            {
                                "externalReference": "Doing ErrandTimestamp",
                                "name": "Doing Errand Timestamp"
                            },
                            {
                                "externalReference": "Waiting for ErrandTimestamp",
                                "name": "Waiting for Errand Timestamp",
                                "value": "2023-04-27T11:00:51.233-04:00"
                            },
                            {
                                "externalReference": "DepartureTime",
                                "name": "Departure Time"
                            }
                        ]
                    }
                }
            }
        },
        {
            "eventTimestamp": "2023-05-05T10:40:44.692-04:00",
            "spotReference": "FL99_A-025",
            "spotInfo": {
                "vehicleSpot": {
                    "familyType": "VEHICLE_SPOT",
                    "type": "Parking",
                    "externalReference": "FL99_A-025",
                    "state": "Available",
                    "displayState": "Available",
                    "fields": {
                        "c3FieldStrings": [
                            {
                                "externalReference": "4095",
                                "name": "Blocking Spot"
                            },
                            {
                                "externalReference": "Disabled",
                                "name": "Disabled",
                                "value": "false"
                            },
                            {
                                "externalReference": "WorkflowVehicleSpot",
                                "name": "Spot",
                                "value": "FL99_A-025"
                            }
                        ],
                        "c3FieldIntegers": [
                            {
                                "externalReference": "Workflow",
                                "name": "Workflow",
                                "value": 3723
                            },
                            {
                                "externalReference": "SPOT_VehicleZoneType",
                                "name": "Vehicle Zone Type"
                            }
                        ],
                        "c3FieldDecimals": [],
                        "c3FieldDateTimes": [
                            {
                                "externalReference": "DisabledTimestamp",
                                "name": "Disabled Timestamp"
                            },
                            {
                                "externalReference": "LockedTimestamp",
                                "name": "Locked Timestamp"
                            },
                            {
                                "externalReference": "OutgoingTimestamp",
                                "name": "Outgoing Timestamp",
                                "value": "2023-05-03T17:05:23.363-04:00"
                            }
                        ]
                    }
                }
            }
        }
    ]
}

请看附件

vshtjzan

vshtjzan1#

有几种方法可以实现这一点:

  • 仅使用SQL实现此目的的最简单方法是使用COPY INTO SQL命令(doctutorial),该命令可以从云存储读取文件,根据指定的文件格式解析文件,然后可以应用转换。实际的实现将取决于您是希望摄取所有数据还是仅提取特定字段等。此命令提供增量数据摄取,而无需重新摄取已处理的数据。就像这样:
COPY INTO table_name
 FROM 'abfss://container@storage.dfs.core.windows.net/path'
 FILEFORMAT = JSON

相关问题