我运行了带有和不带smb join的查询,得到了不同的结果。请帮忙解释一下。
SET hive.enforce.bucketing=true;
create table dbaproceduresbuckets (
owner string ,
object_name string ,
procedure_name string ,
object_id double ,
subprogram_id double ,
overload string ,
object_type string ,
aggregate string ,
pipelined string ,
impltypeowner string ,
impltypename string ,
parallel string ,
interface string ,
deterministic string ,
authid string )
CLUSTERED BY (object_id) SORTED BY (OBJECT_ID ASC) INTO 32 BUCKETS;
CREATE TABLE dbaobjectsbuckets1(
owner string,
object_name string,
subobject_name string,
object_id double,
data_object_id double,
object_type string,
created string,
last_ddl_time string,
timestamp string,
status string,
temporary string,
generated string,
secondary string,
namespace double,
edition_name string) CLUSTERED BY (object_id) SORTED BY (OBJECT_ID ASC) INTO 32 BUCKETS;
****load the table;
0: jdbc:hive2://:10000>从dbaobjectsbuckets1 a,dbaproceduresbuckets b0中选择count(*):jdbc:hive2://xx:10000>其中a.object\u id=b.object\u id;信息:阶段2的hadoop作业信息:Map器数量:3;减速器数量:1信息:2016-06-13 15:56:00381第二阶段map=0%,减速=0%信息:2016-06-13 15:56:55818第二阶段map=1%,减速=0%,累计cpu 122.6秒信息:2016-06-13 15:57:47124第二阶段map=7%,减速=0%,累计cpu 326.86秒。。。。。。。。。。信息:2016-06-13 16:05:01246 stage-2 map=100%,reduce=100%,累计cpu 867.1秒信息:mapreduce累计cpu总时间:14分27秒100毫秒信息:结束作业=job1464280256859_0146+
5条答案
按热度按时间nsc4cvqm1#
++| | u c0 |+
vsikbqxv2#
++| 2 |+------+--+
????? 我的问题是,为什么我使用smb加入时只有2个??????应该是54876。
谢谢!
ef1yzkbh3#
在将数据插入排序表时使用sort by子句
或
将数据插入排序表之前
3qpi33ja4#
------+|c0+
edqdpe6u5#
-+| 54876 |+--------+--+
0: jdbc:hive2://xx:10000>其中a.object\u id=b.object\u id;
|排序合并桶Map联接运算符| |条件Map:| |内部联接0到1 | |键:| | 0对象| id(类型:double)| | 1对象| id(类型:double)
信息:结束作业=job1464280256859_0150+