[Author]: kwu
基于Hive及Sqoop的每日PV、UV、IP定时分析
1、创建pvuvip的hive表
[mw_shl_code=sql,true]hive -e "
use stage;
CREATE EXTERNAL TABLE pvuvip(
day string,
pv int,
uv int,
ipcnt int
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
LOCATION '/dw/stage/pvuvip/';"[/mw_shl_code]
2、创建mysql关系库的表
[mw_shl_code=sql,true]CREATE TABLE
pvuvip
(
id INT NOT NULL AUTO_INCREMENT,
DAY VARCHAR(50),
pv INT,
uv INT,
ipcnt INT,
PRIMARY KEY (id)
)
ENGINE=MyISAM DEFAULT CHARSET=latin1 [/mw_shl_code]
3、每日定时执行的自动化脚本
[mw_shl_code=sql,true]#!/bin/sh
# upload logs to hdfs
yesterday=`date --date='1 days ago' +%Y%m%d`
hive -e "
use stage;
insert overwrite table pvuvip select day,count(*) pv, count(distinct cookieid) uv , count(distinct ip) ipcnt from ods.tracklog where day='${yesterday}' group by day;"
sqoop export --connect jdbc:mysql://localhost:3306/charts --username root --password 123456 --table pvuvip --fields-terminated-by ' ' --columns "day,pv,uv,ipcnt" --export-dir /dw/stage/pvuvip/;[/mw_shl_code]
4、在crontab中增加定时任务
[mw_shl_code=sql,true]18 06 * * * /opt/bin/sqoop_opt/pvuvip.opt [/mw_shl_code]
5、刷新定时任务
[mw_shl_code=sql,true]/sbin/service crond reload [/mw_shl_code]
|
|