本帖最后由 levycui 于 2020-9-22 19:49 编辑
问题导读:
1、如何设计设备主题?
2、如何设计会员主题(会员主题+漏斗分析)?
3、如何设计商品主题(个数+销量)?
4、如何设计营销主题(用户+商品+购买行为)?
上一篇:数仓项目(九)数仓搭建 - DWS 层
一、数仓搭建 - ADS 层
1.1 设备主题
1.1.1 活跃设备数(日、周、月)
需求定义:
日活:当日活跃的设备数
周活:当周活跃的设备数
月活:当月活跃的设备数
1)建表语句
- drop table if exists ads_uv_count;
- create external table ads_uv_count(
- `dt` string COMMENT '统计日期',
- `day_count` bigint COMMENT '当日用户数量',
- `wk_count` bigint COMMENT '当周用户数量',
- `mn_count` bigint COMMENT '当月用户数量',
- `is_weekend` string COMMENT 'Y,N 是否是周末,用于得到本周最终结果',
- `is_monthend` string COMMENT 'Y,N 是否是月末,用于得到本月最终结果'
- ) COMMENT '活跃设备数'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_uv_count/';
复制代码
2)导入数据
- insert into table ads_uv_count
- select
- '2020-03-10' dt,
- daycount.ct,
- wkcount.ct,
- mncount.ct,
- if(date_add(next_day('2020-03-10','MO'),-1)='2020-03-10','Y','N') ,
- if(last_day('2020-03-10')='2020-03-10','Y','N')
- from
- (
- select
- '2020-03-10' dt,
- count(*) ct
- from dwt_uv_topic
- where login_date_last='2020-03-10'
- )daycount join
- (
- select
- '2020-03-10' dt,
- count (*) ct
- from dwt_uv_topic
- where login_date_last>=date_add(next_day('2020-03-10','MO'),-7)
- and login_date_last<= date_add(next_day('2020-03-10','MO'),-1)
- ) wkcount on daycount.dt=wkcount.dt
- join
- (
- select
- '2020-03-10' dt,
- count (*) ct
- from dwt_uv_topic
- where
- date_format(login_date_last,'yyyy-MM')=date_format('2020-03-10','yyyy-MM')
- )mncount on daycount.dt=mncount.dt;
复制代码
3)查询导入结果
- select * from ads_uv_count;
复制代码
1.1.2 每日新增设备
1)建表语句
- drop table if exists ads_new_mid_count;
- create external table ads_new_mid_count
- (
- `create_date` string comment '创建时间' ,
- `new_mid_count` BIGINT comment '新增设备数量'
- ) COMMENT '每日新增设备信息数量'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_new_mid_count/';
复制代码
2)导入数据
- insert into table ads_new_mid_count
- select
- login_date_first,
- count(*)
- from dwt_uv_topic
- where login_date_first='2020-03-10'
- group by login_date_first;
复制代码
3)查询导入数据
- select * from ads_new_mid_count;
复制代码
1.1.3 沉默用户数
需求定义:
沉默用户:只在安装当天启动过,且启动时间是在 7 天前
1)建表语句
- drop table if exists ads_silent_count;
- create external table ads_silent_count(
- `dt` string COMMENT '统计日期',
- `silent_count` bigint COMMENT '沉默设备数'
- )
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_silent_count';
复制代码
2)导入 2020-03-20 数据
- insert into table ads_silent_count
- select
- '2020-03-15',
- count(*)
- from dwt_uv_topic
- where login_date_first=login_date_last
- and login_date_last<=date_add('2020-03-15',-7);
复制代码
3)查询导入数据
- select * from ads_silent_count;
复制代码
1.1.4 本周回流用户数
需求定义:
本周回流用户:上周未活跃,本周活跃的设备,且不是本周新增设备
1)建表语句
- drop table if exists ads_back_count;
- create external table ads_back_count(
- `dt` string COMMENT '统计日期',
- `wk_dt` string COMMENT '统计日期所在周',
- `wastage_count` bigint COMMENT '回流设备数'
- )
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_back_count';
复制代码
2)导入数据:
- insert into table ads_back_count
- select
- '2020-03-15',
- count(*)
- from
- (
- select
- mid_id
- from dwt_uv_topic
- where login_date_last>=date_add(next_day('2020-03-15','MO'),-7)
- and login_date_last<= date_add(next_day('2020-03-15','MO'),-1)
- and login_date_first<date_add(next_day('2020-03-15','MO'),-7)
- )current_wk
- left join
- (
- select
- mid_id
- from dws_uv_detail_daycount
- where dt>=date_add(next_day('2020-03-15','MO'),-7*2)
- and dt<= date_add(next_day('2020-03-15','MO'),-7-1)
- group by mid_id
- )last_wk
- on current_wk.mid_id=last_wk.mid_id
- where last_wk.mid_id is null;
复制代码
3)查询结果
- select * from ads_back_count;
复制代码
1.1.5 流失用户数
需求定义:
流失用户:最近 7 天未活跃的设备
1)建表语句
- drop table if exists ads_wastage_count;
- create external table ads_wastage_count(
- `dt` string COMMENT '统计日期',
- `wastage_count` bigint COMMENT '流失设备数'
- )
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_wastage_count';
复制代码
2)导入 2020-03-20 数据
- insert into table ads_wastage_count
- select
- '2020-03-20',
- count(*)
- from
- (
- select
- mid_id
- from dwt_uv_topic
- where login_date_last<=date_add('2020-03-20',-7)
- group by mid_id
- )t1;
-
复制代码
3)查询结果
- select * from ads_wastage_count;
复制代码
1.1.6 留存率
1)建表语句
- drop table if exists ads_user_retention_day_rate;
- create external table ads_user_retention_day_rate
- (
- `stat_date` string comment '统计日期',
- `create_date` string comment '设备新增日期',
- `retention_day` int comment '截止当前日期留存天数',
- `retention_count` bigint comment '留存数量',
- `new_mid_count` bigint comment '设备新增数量',
- `retention_ratio` decimal(10,2) comment '留存率'
- ) COMMENT '每日用户留存情况'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_user_retention_day_rate/';
复制代码
2)导入数据
- insert into table ads_user_retention_day_rate
- select
- '2020-03-10',--统计日期
- date_add('2020-03-10',-1),--新增日期
- 1,--留存天数
- sum(if(login_date_first=date_add('2020-03-10',-1) and
- login_date_last='2020-03-10',1,0)),--2020-03-09 的 1 日留存数
- sum(if(login_date_first=date_add('2020-03-10',-1),1,0)),--2020-03-09 新增
- sum(if(login_date_first=date_add('2020-03-10',-1) and
- login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
- 1),1,0))*100
- from dwt_uv_topic
- union all
- select
- '2020-03-10',--统计日期
- date_add('2020-03-10',-2),--新增日期
- 2,--留存天数
- sum(if(login_date_first=date_add('2020-03-10',-2) and
- login_date_last='2020-03-10',1,0)),--2020-03-08 的 2 日留存数
- sum(if(login_date_first=date_add('2020-03-10',-2),1,0)),--2020-03-08 新增
- sum(if(login_date_first=date_add('2020-03-10',-2) and
- login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
- 2),1,0))*100
- from dwt_uv_topic
- union all
- select
- '2020-03-10',--统计日期
- date_add('2020-03-10',-3),--新增日期
- 3,--留存天数
- sum(if(login_date_first=date_add('2020-03-10',-3) and
- login_date_last='2020-03-10',1,0)),--2020-03-07 的 3 日留存数
- sum(if(login_date_first=date_add('2020-03-10',-3),1,0)),--2020-03-07 新增
- sum(if(login_date_first=date_add('2020-03-10',-3) and
- login_date_last='2020-03-10',1,0))/sum(if(login_date_first=date_add('2020-03-10',-
- 3),1,0))*100
- from dwt_uv_topic;
复制代码
3)查询导入数据
- select * from ads_user_retention_day_rate;
复制代码
1.1.7 最近连续三周活跃用户数
1)建表语句
- drop table if exists ads_continuity_wk_count;
- create external table ads_continuity_wk_count(
- `dt` string COMMENT '统计日期,一般用结束周周日日期,如果每天计算一次,可用当天日
- 期',
- `wk_dt` string COMMENT '持续时间',
- `continuity_count` bigint COMMENT '活跃次数'
- )
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_continuity_wk_count';
复制代码
2)导入 2020-03-20 所在周的数据
- insert into table ads_continuity_wk_count
- select
- '2020-03-15',
- concat(date_add(next_day('2020-03-15','MO'),-7*3),'_',date_add(next_day('
- 2020-03-15','MO'),-1)),
- count(*)
- from
- (
- select
- mid_id
- from
- (
- select
- mid_id
- from dws_uv_detail_daycount
- where dt>=date_add(next_day('2020-03-10','monday'),-7)
- and dt<=date_add(next_day('2020-03-10','monday'),-1)
- group by mid_id
- union all
- select
- mid_id
- from dws_uv_detail_daycount
- where dt>=date_add(next_day('2020-03-10','monday'),-7*2)
- and dt<=date_add(next_day('2020-03-10','monday'),-7-1)
- group by mid_id
- union all
- select
- mid_id
- from dws_uv_detail_daycount
- where dt>=date_add(next_day('2020-03-10','monday'),-7*3)
- and dt<=date_add(next_day('2020-03-10','monday'),-7*2-1)
- group by mid_id
- )t1
- group by mid_id
- having count(*)=3
- )t2
复制代码
3)查询
- select * from ads_continuity_wk_count;
复制代码
1.1.8 最近七天内连续三天活跃用户数
1)建表语句
- drop table if exists ads_continuity_uv_count;
- create external table ads_continuity_uv_count(
- `dt` string COMMENT '统计日期',
- `wk_dt` string COMMENT '最近 7 天日期',
- `continuity_count` bigint
- ) COMMENT '连续活跃设备数'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_continuity_uv_count';
复制代码
2)写出导入数据的 SQL 语句
- insert into table ads_continuity_uv_count
- select
- '2020-03-12',
- concat(date_add('2020-03-12',-6),'_','2020-03-12'),
- count(*)
- from
- (
- select mid_id
- from
- (
- select mid_id
- from
- (
- select
- mid_id,
- date_sub(dt,rank) date_dif
- from
- (
- select
- mid_id,
- dt,
- rank() over(partition by mid_id order by dt) rank
- from dws_uv_detail_daycount
- where dt>=date_add('2020-03-12',-6) and
- dt<='2020-03-12'
- )t1
- )t2
- group by mid_id,date_dif
- having count(*)>=3
- )t3
- group by mid_id
- )t4;
复制代码
3)查询
- select * from ads_continuity_uv_count;
复制代码
7.2 会员主题
7.2.1 会员主题信息
1)建表
- drop table if exists ads_user_topic;
- create external table ads_user_topic(
- `dt` string COMMENT '统计日期',
- `day_users` string COMMENT '活跃会员数',
- `day_new_users` string COMMENT '新增会员数',
- `day_new_payment_users` string COMMENT '新增消费会员数',
- `payment_users` string COMMENT '总付费会员数',
- `users` string COMMENT '总会员数',
- `day_users2users` decimal(10,2) COMMENT '会员活跃率',
- `payment_users2users` decimal(10,2) COMMENT '会员付费率',
- `day_new_users2users` decimal(10,2) COMMENT '会员新鲜度'
- ) COMMENT '会员主题信息表'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_user_topic';
复制代码
2)导入数据
- insert into table ads_user_topic
- select
- '2020-03-10',
- sum(if(login_date_last='2020-03-10',1,0)),
- sum(if(login_date_first='2020-03-10',1,0)),
- sum(if(payment_date_first='2020-03-10',1,0)),
- sum(if(payment_count>0,1,0)),
- count(*),
- sum(if(login_date_last='2020-03-10',1,0))/count(*),
- sum(if(payment_count>0,1,0))/count(*),
- sum(if(login_date_first='2020-03-10',1,0))/sum(if(login_date_last='2020-03-10',1,0))
- from dwt_user_topic
复制代码
3)查询数据
- hive (gmall)> select * from ads_user_topic;
复制代码
4)vim ads_user_topic.sh
添加如下内容:
- #!/bin/bash
- APP=gmall
- hive=/opt/modules/hive/bin/hive
- # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
- if [ -n "$1" ] ;then
- do_date=$1
- else
- do_date=`date -d "-1 day" +%F`
- fi
- sql="
- with
- tmp_day_users as
- (
- select
- '$do_date' dt,
- count(*) day_users
- from
- ${APP}.dwt_user_topic
- where
- login_date_last='$do_date'
- ),
- tmp_day_new_users as
- (
- select
- '$do_date' dt,
- count(*) day_new_users
- from
- ${APP}.dwt_user_topic
- where
- login_date_last='$do_date' and login_date_first='$do_date'
- ),
- tmp_day_new_payment_users as
- (
- select
- '$do_date' dt,
- count(*) day_new_payment_users
- from
- ${APP}.dwt_user_topic
- where
- payment_date_first='$do_date'
- ),
- tmp_payment_users as
- (
- select
- '$do_date' dt,
- count(*) payment_users
- from
- ${APP}.dwt_user_topic
- where
- payment_date_first is not null
- ),
- tmp_users as
- (
- select
- '$do_date' dt,
- count(*) users
- from
- ${APP}.dwt_user_topic
- tmp_users
- )
- insert into table ${APP}.ads_user_topic
- select
- '$do_date' dt,
- day_users,
- day_new_users,
- day_new_payment_users,
- payment_users,
- users,
- day_users/users,
- payment_users/users,
- day_new_users/users
- from
- tmp_day_users
- join
- tmp_day_new_users
- on
- tmp_day_users.dt=tmp_day_new_users.dt
- join
- tmp_day_new_payment_users
- on
- tmp_day_users.dt=tmp_day_new_payment_users.dt
- join
- tmp_payment_users
- on
- tmp_day_users.dt=tmp_payment_users.dt
- join
- tmp_users
- on
- tmp_day_users.dt=tmp_users.dt;
- "
- $hive -e "$sql"
复制代码
5)增加脚本执行权限
- chmod 770 ads_user_topic.sh
复制代码
6)执行脚本导入数据
- ads_user_topic.sh 2020-03-11
复制代码
7)查看导入数据
- select * from ads_user_topic;
复制代码
7.2.2 漏斗分析
统计“浏览->购物车->下单->支付”的转化率
思路:统计各个行为的人数,然后计算比值
1)建表语句
- drop table if exists ads_user_action_convert_day;
- create external table ads_user_action_convert_day(
- `dt` string COMMENT '统计日期',
- `total_visitor_m_count` bigint COMMENT '总访问人数',
- `cart_u_count` bigint COMMENT '加入购物车的人数',
- `visitor2cart_convert_ratio` decimal(10,2) COMMENT '访问到加入购物车转化率',
- `order_u_count` bigint COMMENT '下单人数',
- `cart2order_convert_ratio` decimal(10,2) COMMENT '加入购物车到下单转化率',
- `payment_u_count` bigint COMMENT '支付人数',
- `order2payment_convert_ratio` decimal(10,2) COMMENT '下单到支付的转化率'
- ) COMMENT '用户行为漏斗分析'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_user_action_convert_day/';
复制代码
2)数据装载
- insert into table ads_user_action_convert_day
- select
- '2020-03-10',
- uv.day_count,
- ua.cart_count,
- cast(ua.cart_count/uv.day_count as decimal(10,2)) visitor2cart_convert_ratio,
- ua.order_count,
- cast(ua.order_count/ua.cart_count as decimal(10,2)) visitor2order_convert_ratio,
- ua.payment_count,
- cast(ua.payment_count/ua.order_count as decimal(10,2)) order2payment_convert_ratio
- from
- (
- select
- dt,
- sum(if(cart_count>0,1,0)) cart_count,
- sum(if(order_count>0,1,0)) order_count,
- sum(if(payment_count>0,1,0)) payment_count
- from dws_user_action_daycount
- where dt='2020-03-10'
- group by dt
- )ua join ads_uv_count uv on uv.dt=ua.dt;
复制代码
3)查询加载数据
- select * from ads_user_action_convert_day;
复制代码
7.3 商品主题
7.3.1 商品个数信息
1)建表语句
- drop table if exists ads_product_info;
- create external table ads_product_info(
- `dt` string COMMENT '统计日期',
- `sku_num` string COMMENT 'sku 个数',
- `spu_num` string COMMENT 'spu 个数'
- ) COMMENT '商品个数信息'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_product_info';
复制代码
2)导入数据
- insert into table ads_product_info
- select
- '2020-03-10' dt,
- sku_num,
- spu_num
- from
- (
- select
- '2020-03-10' dt,
- count(*) sku_num
- from
- dwt_sku_topic
- ) tmp_sku_num
- join
- (
- select
- '2020-03-10' dt,
- count(*) spu_num
- from
- (
- select
- spu_id
- from
- dwt_sku_topic
- group by
- spu_id
- ) tmp_spu_id
- ) tmp_spu_num
- on
- tmp_sku_num.dt=tmp_spu_num.dt;
复制代码
3)查询结果数据
- select * from ads_product_info;
复制代码
7.3.2 商品销量排名
1)建表语句
- drop table if exists ads_product_sale_topN;
- create external table ads_product_sale_topN(
- `dt` string COMMENT '统计日期',
- `sku_id` string COMMENT '商品 ID',
- `payment_amount` bigint COMMENT '销量'
- ) COMMENT '商品个数信息'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_product_sale_topN';
复制代码
2)导入数据
- insert into table ads_product_sale_topN
- select
- '2020-03-10' dt,
- sku_id,
- payment_amount
- from
- dws_sku_action_daycount
- where
- dt='2020-03-10'
- order by payment_amount desc
- limit 10;
复制代码
3)查询结果数据
- select * from ads_product_sale_topN;
复制代码
7.3.3 商品收藏排名
1)建表语句
- drop table if exists ads_product_favor_topN;
- create external table ads_product_favor_topN(
- `dt` string COMMENT '统计日期',
- `sku_id` string COMMENT '商品 ID',
- `favor_count` bigint COMMENT '收藏量'
- ) COMMENT '商品收藏 TopN'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_product_favor_topN';
复制代码
2)导入数据
- insert into table ads_product_favor_topN
- select
- '2020-03-10' dt,
- sku_id,
- favor_count
- from
- dws_sku_action_daycount
- where
- dt='2020-03-10'
- order by favor_count desc
- limit 10;
复制代码
3)查询数据
- select * from ads_product_favor_topN;
复制代码
7.3.4 商品加入购物车排名
1)建表语句
- drop table if exists ads_product_cart_topN;
- create external table ads_product_cart_topN(
- `dt` string COMMENT '统计日期',
- `sku_id` string COMMENT '商品 ID',
- `cart_num` bigint COMMENT '加入购物车数量'
- ) COMMENT '商品加入购物车 TopN'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_product_cart_topN';
复制代码
2)导入数据
- insert into table ads_product_cart_topN
- select
- '2020-03-10' dt,
- sku_id,
- cart_num
- from
- dws_sku_action_daycount
- where
- dt='2020-03-10'
- order by cart_num desc
- limit 10;
复制代码
3)查询数据
- select * from ads_product_cart_topN;
复制代码
7.3.5 商品退款率排名(最近 30 天)
1)建表语句
- drop table if exists ads_product_refund_topN;
- create external table ads_product_refund_topN(
- `dt` string COMMENT '统计日期',
- `sku_id` string COMMENT '商品 ID',
- `refund_ratio` decimal(10,2) COMMENT '退款率'
- ) COMMENT '商品退款率 TopN'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_product_refund_topN';
复制代码
2)导入数据
- insert into table ads_product_refund_topN
- select
- '2020-03-10',
- sku_id,
- refund_last_30d_count/payment_last_30d_count*100 refund_ratio
- from dwt_sku_topic
- order by refund_ratio desc
- limit 10;
复制代码
3)查询数据
- select * from ads_product_refund_topN;
复制代码
7.3.6 商品差评率
1)建表语句
- drop table if exists ads_appraise_bad_topN;
- create external table ads_appraise_bad_topN(
- `dt` string COMMENT '统计日期',
- `sku_id` string COMMENT '商品 ID',
- `appraise_bad_ratio` decimal(10,2) COMMENT '差评率'
- ) COMMENT '商品差评率 TopN'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_appraise_bad_topN';
复制代码
2)导入数据
- insert into table ads_appraise_bad_topN
- select
- '2020-03-10' dt,
- sku_id,
- appraise_bad_count/(appraise_good_count+appraise_mid_count+appraise_bad_coun
- t+appraise_default_count) appraise_bad_ratio
- from
- dws_sku_action_daycount
- where
- dt='2020-03-10'
- order by appraise_bad_ratio desc
- limit 10;
复制代码
3)查询数据
- select * from ads_appraise_bad_topN;
复制代码
7.4 营销主题(用户+商品+购买行为)
7.4.1 下单数目统计
需求分析:统计每日下单数,下单金额及下单用户数
1)建表语句
- drop table if exists ads_order_daycount;
- create external table ads_order_daycount(
- dt string comment '统计日期',
- order_count bigint comment '单日下单笔数',
- order_amount decimal(10,2) comment '单日下单金额',
- order_users bigint comment '单日下单用户数'
- ) comment '每日订单总计表'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_order_daycount';
复制代码
2)导入数据
- insert into table ads_order_daycount
- select
- '2020-03-10',
- sum(order_count),
- sum(order_amount),
- sum(if(order_count>0,1,0))
- from dws_user_action_daycount
- where dt='2020-03-10';
复制代码
3)查询数据
- select * from ads_order_daycount;
复制代码
7.4.2 支付信息统计
每日支付金额、支付人数、支付商品数、支付笔数以及下单到支付的平均时长(取自 DWD)
1)建表
- drop table if exists ads_payment_daycount;
- create external table ads_payment_daycount(
- dt string comment '统计日期',
- payment_count bigint comment '单日支付笔数',
- payment_amount decimal(10,2) comment '单日支付金额',
- payment_user_count bigint comment '单日支付人数',
- payment_sku_count bigint comment '单日支付商品数',
- payment_avg_time double comment '下单到支付的平均时长,取分钟数'
- ) comment '每日订单总计表'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_payment_daycount';
复制代码
2)导入数据
- insert into table ads_payment_daycount
- select
- tmp_payment.dt,
- tmp_payment.payment_count,
- tmp_payment.payment_amount,
- tmp_payment.payment_user_count,
- tmp_skucount.payment_sku_count,
- tmp_time.payment_avg_time
- from
- (
- select
- '2020-03-15' dt,
- sum(payment_count) payment_count,
- sum(payment_amount) payment_amount,
- sum(if(payment_count>0,1,0)) payment_user_count
- from dws_user_action_daycount
- where dt='2020-03-15'
- )tmp_payment
- join
- (
- select
- '2020-03-15' dt,
- sum(if(payment_count>0,1,0)) payment_sku_count
- from dws_sku_action_daycount
- where dt='2020-03-15'
- )tmp_skucount on tmp_payment.dt=tmp_skucount.dt
- join
- (
- select
- '2020-03-15' dt,
- sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60
- payment_avg_time
- from dwd_fact_order_info
- where dt='2020-03-15'
- and payment_time is not null
- )tmp_time on tmp_payment.dt=tmp_time.dt
复制代码
3)查询数据
- select * from ads_payment_daycount;
复制代码
7.4.3 复购率
1)建表语句
- drop table ads_sale_tm_category1_stat_mn;
- create external table ads_sale_tm_category1_stat_mn
- (
- tm_id string comment '品牌 id',
- category1_id string comment '1 级品类 id ',
- category1_name string comment '1 级品类名称 ',
- buycount bigint comment '购买人数',
- buy_twice_last bigint comment '两次以上购买人数',
- buy_twice_last_ratio decimal(10,2) comment '单次复购率',
- buy_3times_last bigint comment '三次以上购买人数',
- buy_3times_last_ratio decimal(10,2) comment '多次复购率',
- stat_mn string comment '统计月份',
- stat_date string comment '统计日期'
- ) COMMENT '复购率统计'
- row format delimited fields terminated by '\t'
- location '/warehouse/gmall/ads/ads_sale_tm_category1_stat_mn/';
复制代码
2)数据导入
- insert into table ads_sale_tm_category1_stat_mn
- select
- mn.sku_tm_id,
- mn.sku_category1_id,
- mn.sku_category1_name,
- sum(if(mn.order_count>=1,1,0)) buycount,
- sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
- sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))
- buyTwiceLastRatio,
- sum(if(mn.order_count>=3,1,0)) buy3timeLast ,
- sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))
- buy3timeLastRatio ,
- date_format('2020-03-10' ,'yyyy-MM') stat_mn,
- '2020-03-10' stat_date
- from
- (
- select
- user_id,
- sd.sku_tm_id,
- sd.sku_category1_id,
- sd.sku_category1_name,
- sum(order_count) order_count
- from dws_sale_detail_daycount sd
- where date_format(dt,'yyyy-MM')=date_format('2020-03-10' ,'yyyy-MM')
- group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name
- ) mn
- group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
复制代码
7.5 ADS 层导入脚本
1)vim dwt_to_ads.sh
在脚本中填写如下内容
- #!/bin/bash
- hive=/opt/modules/hive/bin/hive
- # 如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
- if [ -n "$1" ] ;then
- do_date=$1
- else
- do_date=`date -d "-1 day" +%F`
- fi
- sql="use gmall;
- insert into table ads_uv_count
- select
- '$do_date',
- sum(if(login_date_last='$do_date',1,0)),
- sum(if(login_date_last>=date_add(next_day('$do_date','monday'),-7) and
- login_date_last<=date_add(next_day('$do_date','monday'),-1) ,1,0)),
- sum(if(date_format(login_date_last,'yyyy-MM')=date_format('$do_date','yyyy-M
- M'),1,0)),
- if('$do_date'=date_add(next_day('$do_date','monday'),-1),'Y','N'),
- if('$do_date'=last_day('$do_date'),'Y','N')
- from dwt_uv_topic;
-
-
- insert into table ads_new_mid_count
- select
- '$do_date',
- count(*)
- from dwt_uv_topic
- where login_date_first='$do_date';
-
-
- insert into table ads_silent_count
- select
- '$do_date',
- count(*)
- from dwt_uv_topic
- where login_date_first=login_date_last
- and login_date_last<=date_add('$do_date',-7);
-
-
- insert into table ads_back_count
- select
- '$do_date',
- concat(date_add(next_day('2020-03-10','MO'),-7),'_',date_add(next_day('2020-
- 03-10','MO'),-1)),
- count(*)
- from
- (
- select
- mid_id
- from dwt_uv_topic
- where login_date_last>=date_add(next_day('$do_date','MO'),-7)
- and login_date_last<= date_add(next_day('$do_date','MO'),-1)
- and login_date_first<date_add(next_day('$do_date','MO'),-7)
- )current_wk
- left join
- (
- select
- mid_id
- from dws_uv_detail_daycount
- where dt>=date_add(next_day('$do_date','MO'),-7*2)
- and dt<= date_add(next_day('$do_date','MO'),-7-1)
- group by mid_id
- )last_wk
- on current_wk.mid_id=last_wk.mid_id
- where last_wk.mid_id is null;
-
-
- insert into table ads_wastage_count
- select
- '$do_date',
- count(*)
- from dwt_uv_topic
- where login_date_last<=date_add('$do_date',-7);
-
-
- insert into table ads_user_retention_day_rate
- select
- '$do_date',
- date_add('$do_date',-3),
- 3,
- sum(if(login_date_first=date_add('$do_date',-3) and
- login_date_last='$do_date',1,0)),
- sum(if(login_date_first=date_add('$do_date',-3),1,0)),
- sum(if(login_date_first=date_add('$do_date',-3) and
- login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',
- -3),1,0))*100
- from dwt_uv_topic
- union all
- select
- '$do_date',
- date_add('$do_date',-2),
- 2,
- sum(if(login_date_first=date_add('$do_date',-2) and
- login_date_last='$do_date',1,0)),
- sum(if(login_date_first=date_add('$do_date',-2),1,0)),
- sum(if(login_date_first=date_add('$do_date',-2) and
- login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',
- -2),1,0))*100
- from dwt_uv_topic
- union all
- select
- '$do_date',
- date_add('$do_date',-1),
- 1,
- sum(if(login_date_first=date_add('$do_date',-1) and
- login_date_last='$do_date',1,0)),
- sum(if(login_date_first=date_add('$do_date',-1),1,0)),
- sum(if(login_date_first=date_add('$do_date',-1) and
- login_date_last='$do_date',1,0))/sum(if(login_date_first=date_add('$do_date',
- -1),1,0))*100
- from dwt_uv_topic;
-
-
- insert into table ads_continuity_wk_count
- select
- '$do_date',
- concat(date_add(next_day('$do_date','MO'),-7*3),'_',date_add(next_day('$do_d
- ate','MO'),-1)),
- count(*)
- from
- (
- select
- mid_id
- from
- (
- select
- mid_id
- from dws_uv_detail_daycount
- where dt>=date_add(next_day('$do_date','monday'),-7)
- and dt<=date_add(next_day('$do_date','monday'),-1)
- group by mid_id
- union all
- select
- mid_id
- from dws_uv_detail_daycount
- where dt>=date_add(next_day('$do_date','monday'),-7*2)
- and dt<=date_add(next_day('$do_date','monday'),-7-1)
- group by mid_id
- union all
- select
- mid_id
- from dws_uv_detail_daycount
- where dt>=date_add(next_day('$do_date','monday'),-7*3)
- and dt<=date_add(next_day('$do_date','monday'),-7*2-1)
- group by mid_id
- )t1
- group by mid_id
- having count(*)=3
- )t2;
-
-
- insert into table ads_continuity_uv_count
- select
- '$do_date',
- concat(date_add('$do_date',-6),'_','$do_date'),
- count(*)
- from
- (
- select mid_id
- from
- (
- select mid_id
- from
- (
- select
- mid_id,
- date_sub(dt,rank) date_dif
- from
- (
- select
- mid_id,
- dt,
- rank() over(partition by mid_id order by dt) rank
- from dws_uv_detail_daycount
- where dt>=date_add('$do_date',-6) and dt<='$do_date'
- )t1
- )t2
- group by mid_id,date_dif
- having count(*)>=3
- )t3
- group by mid_id
- )t4;
-
-
- insert into table ads_user_topic
- select
- '$do_date',
- sum(if(login_date_last='$do_date',1,0)),
- sum(if(login_date_first='$do_date',1,0)),
- sum(if(payment_date_first='$do_date',1,0)),
- sum(if(payment_count>0,1,0)),
- count(*),
- sum(if(login_date_last='$do_date',1,0))/count(*),
- sum(if(payment_count>0,1,0))/count(*),
- sum(if(login_date_first='$do_date',1,0))/sum(if(login_date_last='$do_date',1,
- 0))
- from dwt_user_topic;
-
-
- insert into table ads_user_action_convert_day
- select
- '$do_date',
- uv.day_count,
- ua.cart_count,
- ua.cart_count/uv.day_count*100 visitor2cart_convert_ratio,
- ua.order_count,
- ua.order_count/ua.cart_count*100 visitor2order_convert_ratio,
- ua.payment_count,
- ua.payment_count/ua.order_count*100 order2payment_convert_ratio
- from
- (
- select
- '$do_date' dt,
- sum(if(cart_count>0,1,0)) cart_count,
- sum(if(order_count>0,1,0)) order_count,
- sum(if(payment_count>0,1,0)) payment_count
- from dws_user_action_daycount
- where dt='$do_date'
- )ua join ads_uv_count uv on uv.dt=ua.dt;
-
-
- insert into table ads_product_info
- select
- '$do_date' dt,
- sku_num,
- spu_num
- from
- (
- select
- '$do_date' dt,
- count(*) sku_num
- from
- dwt_sku_topic
- ) tmp_sku_num
- join
- (
- select
- '$do_date' dt,
- count(*) spu_num
- from
- (
- select
- spu_id
- from
- dwt_sku_topic
- group by
- spu_id
- ) tmp_spu_id
- ) tmp_spu_num
- on tmp_sku_num.dt=tmp_spu_num.dt;
-
-
- insert into table ads_product_sale_topN
- select
- '$do_date',
- sku_id,
- payment_amount
- from dws_sku_action_daycount
- where dt='$do_date'
- order by payment_amount desc
- limit 10;
-
-
- insert into table ads_product_favor_topN
- select
- '$do_date',
- sku_id,
- favor_count
- from dws_sku_action_daycount
- where dt='$do_date'
- order by favor_count
- limit 10;
-
-
- insert into table ads_product_cart_topN
- select
- '$do_date' dt,
- sku_id,
- cart_num
- from dws_sku_action_daycount
- where dt='$do_date'
- order by cart_num
- limit 10;
-
-
- insert into table ads_product_refund_topN
- select
- '$do_date',
- sku_id,
- refund_last_30d_count/payment_last_30d_count*100 refund_ratio
- from dwt_sku_topic
- order by refund_ratio desc
- limit 10;
-
-
- insert into table ads_appraise_bad_topN
- select
- '$do_date' dt,
- sku_id,
- appraise_bad_count/(appraise_bad_count+appraise_good_count+appraise_mid_coun
- t+appraise_default_count)*100 appraise_bad_ratio
- from dws_sku_action_daycount
- where dt='$do_date'
- order by appraise_bad_ratio desc
- limit 10;
-
-
- insert into table ads_order_daycount
- select
- '$do_date',
- sum(order_count),
- sum(order_amount),
- sum(if(order_count>0,1,0))
- from dws_user_action_daycount
- where dt='$do_date';
-
-
- insert into table ads_payment_daycount
- select
- tmp_payment.dt,
- tmp_payment.payment_count,
- tmp_payment.payment_amount,
- tmp_payment.payment_user_count,
- tmp_skucount.payment_sku_count,
- tmp_time.payment_avg_time
- from
- (
- select
- '$do_date' dt,
- sum(payment_count) payment_count,
- sum(payment_amount) payment_amount,
- sum(if(payment_count>0,1,0)) payment_user_count
- from dws_user_action_daycount
- where dt='$do_date'
- )tmp_payment
- join
- (
- select
- '$do_date' dt,
- sum(if(payment_count>0,1,0)) payment_sku_count
- from dws_sku_action_daycount
- where dt='$do_date'
- )tmp_skucount on tmp_payment.dt=tmp_skucount.dt
- join
- (
- select
- '$do_date' dt,
- sum(unix_timestamp(payment_time)-unix_timestamp(create_time))/count(*)/60
- payment_avg_time
- from dwd_fact_order_info
- where dt='$do_date'
- and payment_time is not null
- )tmp_time on tmp_payment.dt=tmp_time.dt;
-
-
- insert into table ads_sale_tm_category1_stat_mn
- select
- mn.sku_tm_id,
- mn.sku_category1_id,
- mn.sku_category1_name,
- sum(if(mn.order_count>=1,1,0)) buycount,
- sum(if(mn.order_count>=2,1,0)) buyTwiceLast,
- sum(if(mn.order_count>=2,1,0))/sum( if(mn.order_count>=1,1,0))
- buyTwiceLastRatio,
- sum(if(mn.order_count>=3,1,0)) buy3timeLast ,
- sum(if(mn.order_count>=3,1,0))/sum( if(mn.order_count>=1,1,0))
- buy3timeLastRatio ,
- date_format('$do_date' ,'yyyy-MM') stat_mn,
- '$do_date' stat_date
- from
- (
- select
- user_id,
- sd.sku_tm_id,
- sd.sku_category1_id,
- sd.sku_category1_name,
- sum(order_count) order_count
- from dws_sale_detail_daycount sd
- where date_format(dt,'yyyy-MM')=date_format('$do_date' ,'yyyy-MM')
- group by user_id, sd.sku_tm_id, sd.sku_category1_id, sd.sku_category1_name
- ) mn
- group by mn.sku_tm_id, mn.sku_category1_id, mn.sku_category1_name;
- "
- $hive -e "$sql"
复制代码
2)增加脚本执行权限
复制代码 3)执行脚本导入数据
复制代码 4)查看导入数据
作者:together
来源:https://together.blog.csdn.net/article/details/105968377
最新经典文章,欢迎关注公众号
|