create external table default.stack
(col1 string,
col2 string,
col3 string,
col4 int,
col5 int
)
ROW FORMAT DELIMITED
FIELDS terminated by ','
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
location 'hdfs://nameservice1/location1...';
在另一个位置创建另一个分区表,如
create external table default.stack_part
(col1 string,
col2 string,
col3 string,
col4 int,
col5 int
)
PARTITIONED BY ( servername string, applicationname string, load_date string)
STORED as AVRO -- u can choose any format for the final file
location 'hdfs://nameservice1/location2...';
使用以下查询从基表插入分区表:
set hive.exec.dynamic.partition.mode=nonstrict;
SET hive.exec.compress.output=true;
set hive.exec.parallel=true;
SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
Insert overwrite table default.stack_part
partition ( servername, applicationname, load_date)
select *,
split(reverse(split(reverse(INPUT__FILE__NAME),"/")[0]),"_")[0] as servername
,split(split(reverse(split(reverse(INPUT__FILE__NAME),"/")[0]),"_")[1],'[.]')[0] as applicationname
,split(split(reverse(split(reverse(INPUT__FILE__NAME),"/")[0]),"_")[1],'[.]')[3] as load_date
from default.stack;
1条答案
按热度按时间ig9co6j11#
我假设文件名的格式为servername\u applicationname.x.log.yyyy-mm-dd(删除了第二个“applicationname”,假设是打字错误)。
在原始文件的内容上创建一个表。像。。
在另一个位置创建另一个分区表,如
使用以下查询从基表插入分区表:
我已经测试过了,它能用。