set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin = true;
set hive.optimize.bucketmapjoin.sortedmerge = true;
set hive.auto.convert.sortmerge.join.noconditionaltask=true;
set hive.execution.engine=mr;
set hive.auto.convert.sortmerge.join=true;
set hive.optimize.bucketmapjoin=true;
set hive.optimize.bucketmapjoin.sortedmerge=true;
set hive.enforce.bucketing=true;
set hive.enforce.sorting=true;
set hive.auto.convert.join=true;
drop table key_value_large;
drop table key_value_small;
create table key_value_large (
key int,
value string
)
partitioned by (ds string)
CLUSTERED BY (key) SORTED BY (key ASC) INTO 8 BUCKETS
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE;
create table key_value_small (
key int,
value string
)
partitioned by (ds string)
CLUSTERED BY (key) SORTED BY (key ASC) INTO 4 BUCKETS
ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' STORED AS TEXTFILE;
insert into table key_value_large partition(ds='2008-04-08') select key, value from key_value_large_src;
insert into table key_value_small partition(ds='2008-04-08') select key, value from key_value_small_src;
explain extended select count(*) from key_value_large a JOIN key_value_small b ON a.key = b.key;
select count(*) from key_value_large a JOIN key_value_small b ON a.key = b.key;
2条答案
按热度按时间q5lcpyga1#
下面是smbmjoin的提示
在使用以上提示之后。如果必须选择符合smbm联接条件的表(两个表应按相同的列和相同数量的存储桶进行扣接,并且必须使用扣接的列联接该表)
下面的解释显示了连接查询的o/p
正如您所看到的,o/p清楚地表示它将执行smbmjoin。
o2rvlv0m2#
您可以在查询中使用explain extended。到目前为止,我只能用map reduce生成smb map join。当配置单元执行smbMap连接时,您可以在explain的输出中的stage plans下看到“sorted merge bucket map join operator”。
以下是在我的设置中使用map reduce生成smbMap联接的代码段:
希望能帮上忙。