对于hive的复杂结构数据类型,如何使用where子句编写查询

2vuwiymt  于 2021-06-26  发布在  Hive
关注(0)|答案(1)|浏览(466)

我有下面的配置单元表和复杂的数据类型struct。你能帮我写一个特定城市的带有where子句的配置单元查询吗?

CREATE EXTERNAL TABLE user_t (
 name      STRING,
 id        BIGINT,
 isFTE     BOOLEAN,
 role      VARCHAR(64),
 salary    DECIMAL(8,2),
 phones    ARRAY<INT>,
 deductions MAP<STRING, FLOAT>,
 address   ARRAY<STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>>,
 others    UNIONTYPE<FLOAT,BOOLEAN,STRING>,
 misc      BINARY
 )

我可以在select子句中使用struct数据类型,但不能在where子句中使用相同的数据类型。
工作:

select address.city from user_t;

不工作:

select address.city from user_t where address.city = 'XYZ'

文档说明它在使用groupby或where子句时有局限性,并给出了解决方案。但我不太明白。
链接:文档
请建议。谢谢您。

gorkyyrv

gorkyyrv1#

演示

create table user_t 
(
    id        bigint
   ,address   array<struct<street:string, city:string, state:string, zip:int>>
)
;

insert into user_t 

    select  1
           ,array
            (
                named_struct('street','street_1','city','city_1','state','state_1','zip',11111)
               ,named_struct('street','street_2','city','city_1','state','state_1','zip',11111)
               ,named_struct('street','street_3','city','city_3','state','state_3','zip',33333)
            )

    union all

    select  2
           ,array
            (
                named_struct('street','street_4','city','city_4','state','state_4','zip',44444)
               ,named_struct('street','street_5','city','city_5','state','state_5','zip',55555)
            )
;

选项1:分解

select  u.id
       ,a.*

from    user_t as u
        lateral view explode(address) a as details

where   details.city = 'city_1'
;
+----+---------------------------------------------------------------------+
| id |                               details                               |
+----+---------------------------------------------------------------------+
|  1 | {"street":"street_1","city":"city_1","state":"state_1","zip":11111} |
|  1 | {"street":"street_2","city":"city_1","state":"state_1","zip":11111} |
+----+---------------------------------------------------------------------+

选项2:内联

select  u.id
       ,a.*

from    user_t as u
        lateral view inline(address) a

where   a.city = 'city_1'
;
+----+----------+--------+---------+-------+
| id |  street  |  city  |  state  |  zip  |
+----+----------+--------+---------+-------+
|  1 | street_1 | city_1 | state_1 | 11111 |
|  1 | street_2 | city_1 | state_1 | 11111 |
+----+----------+--------+---------+-------+

选项3:自连接

select  u.*

from            user_t as u

        join   (select  distinct
                        u.id

                from    user_t as u
                        lateral view inline(address) a

                where   a.city = 'city_1'
                ) as u2

        on      u2.id = u.id
;
+----+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| id |                                                                                                    address                                                                                                    |
+----+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|  1 | [{"street":"street_1","city":"city_1","state":"state_1","zip":11111},{"street":"street_2","city":"city_1","state":"state_1","zip":11111},{"street":"street_3","city":"city_3","state":"state_3","zip":33333}] |
+----+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+

相关问题