json与hive的解析

q35jwt9p  于 2021-05-29  发布在  Hadoop
关注(0)|答案(1)|浏览(340)

我的json文件如下:

{
    "total": 3666,
    "offset": 1,
    "len": 2,
    "workflows": [
        {
            "appName": "xxxx1",
            "externalId": null,
            "conf": null,
            "run": 0,
            "acl": null,
            "appPath": null,
            "parentId": null,
            "lastModTime": "Tue, 08 Aug 2017 22:15:11 GMT",
            "consoleUrl": "http://**************:11000/oozie?job=0000130-170807105041043-oozie-oozi-W",
            "createdTime": "Tue, 08 Aug 2017 22:02:13 GMT",
            "startTime": "Tue, 08 Aug 2017 22:02:13 GMT",
            "toString": "Workflow id[0000130-170807105041043-oozie-oozi-W] status[SUCCEEDED]",
            "id": "0000130-170807105041043-oozie-oozi-W",
            "endTime": "Tue, 08 Aug 2017 22:15:11 GMT",
            "user": "user1",
            "actions": [],
            "status": "SUCCEEDED",
            "group": null
        },
        {
            "appName": "xxxx2",
            "externalId": null,
            "conf": null,
            "run": 0,
            "acl": null,
            "appPath": null,
            "parentId": null,
            "lastModTime": "Mon, 07 Aug 2017 20:16:20 GMT",
            "consoleUrl": "http://**************:11000/oozie?job=0000031-170807105041043-oozie-oozi-W",
            "createdTime": "Mon, 07 Aug 2017 20:15:02 GMT",
            "startTime": "Mon, 07 Aug 2017 20:15:02 GMT",
            "toString": "Workflow id[0000031-170807105041043-oozie-oozi-W] status[SUCCEEDED]",
            "id": "0000031-170807105041043-oozie-oozi-W",
            "endTime": "Mon, 07 Aug 2017 20:16:20 GMT",
            "user": "user1",
            "actions": [],
            "status": "SUCCEEDED",
            "group": null
        }
    ]
}

我试着分析这个并把它放到一个Hive表中。我试过以下两种方法。

方法1:

select 
get_json_object(json_data,'$.workflows[5].id') as id,
get_json_object(json_data,'$.workflows[5].appName') as app_name,
get_json_object(json_data,'$.workflows[5].createdTime') as created_time,
get_json_object(json_data,'$.workflows[5].startTime') as start_time,
get_json_object(json_data,'$.workflows[5].endTime') as end_time,
get_json_object(json_data,'$.workflows[5].user') as user,
get_json_object(json_data,'$.workflows[5].status') as status
from 
leap_frog_audit.oozie_json_file

这给了我以下错误:
udtf只支持select子句中的单个表达式

方法2:

CREATE TABLE default.oozie_metrics AS
SELECT m.col AS id,
       k.col AS appName,
       c.col AS createdTime,
       s.col AS start_time,
       e.col AS end_time,
       u.col AS user_name,
       st.col AS status
FROM leap_frog_audit.oozie_json_file LATERAL VIEW explode(split(regexp_replace(get_json_object(json_data,'$.workflows[*].id'),'\\[\\"|\\"\\]',''),'\\"\\,\\"')) m LATERAL VIEW explode(split(regexp_replace(get_json_object(json_data,'$.workflows[*].appName'),'\\[\\"|\\"\\]',''),'\\"\\,\\"')) k LATERAL VIEW explode(split(regexp_replace(get_json_object(json_data,'$.workflows[*].createdTime'),'\\[\\"|\\"\\]',''),'\\"\\,\\"')) c LATERAL VIEW explode(split(regexp_replace(get_json_object(json_data,'$.workflows[*].startTime'),'\\[\\"|\\"\\]',''),'\\"\\,\\"')) s LATERAL VIEW explode(split(regexp_replace(get_json_object(json_data,'$.workflows[*].endTime'),'\\[\\"|\\"\\]',''),'\\"\\,\\"')) e LATERAL VIEW explode(split(regexp_replace(get_json_object(json_data,'$.workflows[*].user'),'\\[\\"|\\"\\]',''),'\\"\\,\\"')) u LATERAL VIEW explode(split(regexp_replace(get_json_object(json_data,'$.workflows[*].status'),'\\[\\"|\\"\\]',''),'\\"\\,\\"')) st

这需要很长时间才能完成。有什么有效的方法来获得下面的输出吗?

+--------------------------------------+----------+-------------------------------+-------------------------------+-------------------------------+-------+-----------+
| id                                   | app_name | created_time                  | start_time                    | end_time                      | user  | status    |
+--------------------------------------+----------+-------------------------------+-------------------------------+-------------------------------+-------+-----------+
| 0000130-170807105041043-oozie-oozi-W | xxxx1    | Tue, 08 Aug 2017 22:02:13 GMT | Tue, 08 Aug 2017 22:02:13 GMT | Tue, 08 Aug 2017 22:15:11 GMT | user1 | SUCCEEDED |
+--------------------------------------+----------+-------------------------------+-------------------------------+-------------------------------+-------+-----------+
| 0000031-170807105041043-oozie-oozi-W | xxxx2    | Mon, 07 Aug 2017 20:15:02 GMT | Mon, 07 Aug 2017 20:15:02 GMT | Mon, 07 Aug 2017 20:16:20 GMT | user1 | SUCCEEDED |
+--------------------------------------+----------+-------------------------------+-------------------------------+-------------------------------+-------+-----------+
kmpatx3s

kmpatx3s1#

select  jt.*
from    oozie_json_file ojf
        lateral view  explode (split(substr(get_json_object(ojf.json_data,'$.workflows[*]'),2),'(?<=\\}),(?=\\{)')) e as app
        lateral view  json_tuple (e.app,'id','appName','createdTime','startTime','endTime','user','status') jt as `id`,`appName`,`createdTime`,`startTime`,`endTime`,`user`,`status`
;
+--------------------------------------+---------+-------------------------------+-------------------------------+-------------------------------+-------+-----------+
| id                                   | appname | createdtime                   | starttime                     | endtime                       | user  | status    |
+--------------------------------------+---------+-------------------------------+-------------------------------+-------------------------------+-------+-----------+
| 0000130-170807105041043-oozie-oozi-W | xxxx1   | Tue, 08 Aug 2017 22:02:13 GMT | Tue, 08 Aug 2017 22:02:13 GMT | Tue, 08 Aug 2017 22:15:11 GMT | user1 | SUCCEEDED |
+--------------------------------------+---------+-------------------------------+-------------------------------+-------------------------------+-------+-----------+
| 0000031-170807105041043-oozie-oozi-W | xxxx2   | Mon, 07 Aug 2017 20:15:02 GMT | Mon, 07 Aug 2017 20:15:02 GMT | Mon, 07 Aug 2017 20:16:20 GMT | user1 | SUCCEEDED |
+--------------------------------------+---------+-------------------------------+-------------------------------+-------------------------------+-------+-----------+

相关问题