目录
一、同期群分析的定义
二、SQL 步骤
「同期群分析」(Cohort Analysis)是一种通过“纵横”结合对用户分群的细分类型分析的方法:
**「同期群」**指的是同一时期的群体,可以是同一天注册的用户、同一天第一次发生付费行为的用户等。
**「周期的指标变化」**是指用户在一定周期内的留存率、付费率等指标。
同期群分析包含三个核心的元素:
同期群分析给到更加细致的衡量指标,可以实时监控真实的用户行为、衡量用户价值,并为营销方案的优化和改进提供支撑,避免“被平均”的虚荣数据。
下面我使用 PostgreSQL
拆分步骤来实现基于首单日期的用户留存率同期群报表,「每一步骤都是在前一步骤的基础上进行再加工」,这在代码中的子查询中也得到体现,理清了思路就会发现其实很简单。
重点有以下几点:
-- 0. 查看数据
SELECT * FROM "日志" LIMIT 10;
-- 1. 根据 uid 、年月聚合用户人数
SELECT
"日志".uid,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' ) AS 年月,
min(to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )) OVER(PARTITION BY "日志".uid) AS 首次付费年月
FROM
"日志"
GROUP BY
"日志".uid,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )
ORDER BY "日志".uid;
-- 2. 计算年月的差额(天数)
SELECT *,to_date(t.年月,'YYYY-MM') - to_date(t.首次付费年月,'YYYY-MM') AS 天数差额
FROM (SELECT
"日志".uid,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' ) AS 年月,
min(to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )) OVER(PARTITION BY "日志".uid) AS 首次付费年月
FROM
"日志"
GROUP BY
"日志".uid,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )
ORDER BY "日志".uid) AS t;
-- 3. 计算年月的差额(月数)
SELECT t.*,
(case when t."天数差额" <= 30 then '首月'
when t."天数差额" <= 60 then '+1月'
when t."天数差额" <= 90 then '+2月'
when t."天数差额" <= 120 then '+3月'
when t."天数差额" <= 150 then '+4月'
else NULL
END) AS 月差额
FROM (SELECT *,to_date(t.年月,'YYYY-MM') - to_date(t.首次付费年月,'YYYY-MM') AS 天数差额
FROM (SELECT
"日志".uid,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' ) AS 年月,
min(to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )) OVER(PARTITION BY "日志".uid) AS 首次付费年月
FROM
"日志"
GROUP BY
"日志".uid,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )
ORDER BY "日志".uid) AS t) AS t;
-- 4. 透视(根据 uid 、首次付费年月去透视年月差额的用户人数)
SELECT t.首次付费年月,
count(distinct case when t.年月差额 = 0 then t.uid else NULL end) AS 首月,
count(distinct case when t.年月差额 = 1 then t.uid else NULL end) AS "+1月",
count(distinct case when t.年月差额 = 2 then t.uid else NULL end) AS "+2月",
count(distinct case when t.年月差额 = 3 then t.uid else NULL end) AS "+3月",
count(distinct case when t.年月差额 = 4 then t.uid else NULL end) AS "+4月"
FROM (SELECT * FROM (SELECT *,round((to_date(t.年月,'YYYY-MM') - to_date(t.首次付费年月,'YYYY-MM')) / 30,0) AS 年月差额
FROM (SELECT
"日志".uid:: text,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' ) AS 年月,
min(to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )) OVER(PARTITION BY "日志".uid) AS 首次付费年月
FROM
"日志"
GROUP BY
"日志".uid,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )
ORDER BY "日志".uid) AS t) AS t) AS t
GROUP BY t.首次付费年月;
-- 5. 计算留存率
SELECT t.首次付费年月,t.首月,
round((t."+1月"::numeric / t.首月::numeric) * 100,2)::text || '%' AS "1月后",
round((t."+2月"::numeric / t.首月::numeric) * 100,2)::text || '%' AS "2月后",
round((t."+3月"::numeric / t.首月::numeric) * 100,2)::text || '%' AS "3月后",
round((t."+4月"::numeric / t.首月::numeric) * 100,2)::text || '%' AS "4月后"
FROM(SELECT t.首次付费年月,
count(distinct case when t.年月差额 = 0 then t.uid else NULL end) AS 首月,
count(distinct case when t.年月差额 = 1 then t.uid else NULL end) AS "+1月",
count(distinct case when t.年月差额 = 2 then t.uid else NULL end) AS "+2月",
count(distinct case when t.年月差额 = 3 then t.uid else NULL end) AS "+3月",
count(distinct case when t.年月差额 = 4 then t.uid else NULL end) AS "+4月"
FROM (SELECT * FROM (SELECT *,round((to_date(t.年月,'YYYY-MM') - to_date(t.首次付费年月,'YYYY-MM')) / 30,0) AS 年月差额
FROM (SELECT
"日志".uid:: text,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' ) AS 年月,
min(to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )) OVER(PARTITION BY "日志".uid) AS 首次付费年月
FROM
"日志"
GROUP BY
"日志".uid,
to_char( to_date( "日志"."日期", 'YYYY-MM' ), 'YYYY-MM' )
ORDER BY "日志".uid) AS t) AS t) AS t
GROUP BY t.首次付费年月) AS t;
- END -
对比Excel系列图书累积销量达15w册,让你轻松掌握数据分析技能,可以点击下方链接进行了解选购:
版权说明 : 本文为转载文章, 版权归原作者所有 版权申明
原文链接 : https://blog.csdn.net/junhongzhang/article/details/125512409
内容来源于网络,如有侵权,请联系作者删除!