📚 오늘의 학습
1. RFM 분석
1. RFM 분석
STEP 1️⃣ RFM 집계
STEP 2️⃣ 단계 정의
STEP 3️⃣ RFM 점수 부여
📌 1️⃣ RFM 집계 📌
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
-- 1. 필요한 데이터
, cte_customer_maxo as (
select *, max(order_date) over() as maxo
from cte_customers
)
-- 2. RFM 산출
, cte_rfm as (
select customer_id
, max(maxo) - max(order_date) as Recency
, count(distinct order_id) as Frequency
, sum(amount) as Monetary
from cte_customer_maxo
group by customer_id
order by 2 , 3 desc, 4 desc
)
select *
from cte_rfm;
📌 2️⃣ 단계 정의 📌
, cte_ntile as (
select customer_id
, recency, ntile(5) over(order by recency desc) as r
, frequency, ntile(5) over(order by frequency) as f
, monetary, ntile(5) over(order by monetary) as m
from cte_rfm
)
📌 3️⃣ RFM 점수 부여 📌
, cte_rfm_score as (
select *
, case
when recency <= 6 then 5
when recency <= 15 then 4
when recency <= 30 then 3
when recency <= 70 then 2
else 1
end as r
, case
when frequency >= 14 then 5
when frequency >= 10 then 4
when frequency >= 7 then 3
when frequency >= 5 then 2
else 1
end as f
, case
when monetary >= 22000 then 5
when monetary >= 12000 then 4
when monetary >= 5500 then 3
when monetary >= 3000 then 2
else 1
end as m
from cte_rfm
order by r desc, f desc, m desc
)
2. RFM 활용
✅ 고객 가치 산정
✅ 점수 별 고객 수
✅ 과거 우수 고객이었으나, 최근 구매하지 않은 고객
✅ 고객 분류
✅ 핵심 고객 구매 이력 추출
📌 ✅ 고객 가치 산정 📌
, cte_rfm_total_score as (
select customer_id , r, f, m, r+f+m as total_score
from cte_rfm_score
order by total_score desc
)
📌 ✅ 점수 별 고객 수 📌
, cte_score_cnt as (
select total_score, count(*) as cnt
from cte_rfm_total_score
group by 1
order by 1 desc
)
📌 ✅ 과거 우수 고객이었으나, 최근 구매하지 않은 고객 📌
, cte_f5m5 as (
select *
from cte_rfm_total_score
where r in (1,2,3,4) and f=5 and m=5
)
📌 ✅ 고객 분류 📌
, cte_rf_count as (
select r, f, count(*) as cnt
from cte_rfm_total_score
group by r, f
order by r desc, f desc
)
, cte_rf_count_pivot as (
select r
, coalesce(max(case when f = 5 then cnt end), 0) as f5
, coalesce(max(case when f = 4 then cnt end), 0) as f4
, coalesce(max(case when f = 3 then cnt end), 0) as f3
, coalesce(max(case when f = 2 then cnt end), 0) as f2
, coalesce(max(case when f = 1 then cnt end), 0) as f1
from cte_rf_count
group by r
order by r desc
)
📌 ✅ 핵심 고객 구매 이력 추출 📌
, cte_power_users_order as (
select *
from cte_customers a1, cte_rfm_total_score a2
where a1.customer_id = a2.customer_id
and a2.total_score = 15
order by a1.customer_id, order_id
)
select *
from cte_power_users_order;
2. 재구매율
1. 연도별 재구매율
-- 1. 고객, 구매연도를 중복되지 않게 불러온다.
, cte_select as (
select distinct customer_id, year
from cte_customers
order by 1
)
-- 2. 다음 연도와 매칭되도록 self join
, cte_select_next_year as (
select a.customer_id, a.year, b.year as next_year
from cte_select a left join cte_select b
on a.customer_id = b.customer_id
and a.year::int+1 = b.year::int
)
-- 3. 연도별 구매자 수 집계하여 재구매율 계산
, cte_reorder_ratio as (
select year
, count(year) as 당해구매자수
, count(next_year) as 다음해구매자수
, round(count(next_year)::numeric / count(customer_id)::numeric *100, 2)||'%' as 재구매율
from cte_select_next_year
group by year
)
2. 월별 재구매율
-- 1. 고객, 구매연도를 중복되지 않게 불러온다.
, cte_select as (
select distinct customer_id
, date_trunc('month', order_date) as 구매월
from cte_customers
order by 1
)
-- 2. 다음 월과 매칭되도록 self join
, cte_select_next_month as (
select a.customer_id, a.구매월, b.구매월 as 다음월
from cte_select a left join cte_select b
on a.customer_id = b.customer_id
and a.구매월+'1 month' = b.구매월
)
-- 3. 월별 구매자 수 집계하여 재구매율 계산
, cte_reorder_ratio as (
select to_char(구매월, 'YYYY-MM')
, count(구매월) as 당해구매자수
, count(다음월) as 다음해구매자수
, round(count(다음월)::numeric / count(구매월)::numeric *100, 2)||'%' as 재구매율
from cte_select_next_month
group by 1
order by 1
)
3. 이탈 고객 분석
비활동 고객 전환 비율 (마지막 구매일 이후 90일 이상 경과한 고객의 비율)
기준일 : order_date의 max값
-- 1. 고객별 최종 구매일 추춣
, cte_customer_last_order as (
select customer_id, max(order_date) as 최종구매일
from cte_customers
group by customer_id
)
--2. 경과일 계산
, cte_date_calc as (
select *, max(최종구매일) over() - 최종구매일 as 경과일
from cte_customer_last_order
)
-- 3. 이탈여부
, cte_out as (
select *
, case when 경과일>=90 then 1 else 0 end as 이탈여부
from cte_date_calc
)
-- 4. 이탈률
, cte_out_ratio as (
select count(*) as 전체고객수
, sum(이탈여부) as 이탈고객수
, sum(이탈여부)/count(*)::numeric*100 as 이탈률
from cte_out
)
-- 5. 이탈 고객 구매 이력 추출
, cte_out_customers as (
select *
from cte_out a1, cte_customers a2
where a1.customer_id = a2.customer_id
and 이탈여부 = 1
)
4. SQL 함수 정리
💻 오늘의 과제
제품의 연도별 재구매율(동일한 고객이 재구매)
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city
, o.order_id , o.order_date
, to_char(o.order_date, 'YYYY') as year
, to_char(o.order_date, 'mm') as month
, to_char(o.order_date, 'dd') as day
, to_char(o.order_date, 'q') as quarter
, od.product_id , od.unit_price , od.quantity , od.discount
, od.unit_price * od.quantity * (1-od.discount) as amount
, p.product_name
, c2.category_id , c2.category_name
from customers c , orders o , order_details od , products p , categories c2
where c.customer_id = o.customer_id
and o.order_id = od.order_id
and od.product_id = p.product_id
and p.category_id = c2.category_id
)
-- 1. 고객, 구매연도를 중복되지 않게 불러온다.
, cte_select as (
select distinct customer_id
, product_name
, date_trunc('year', order_date) as 구매연도
from cte_customers
order by 1
)
-- 2. 다음 연도와 매칭되도록 self join
, cte_select_next_year as (
select a.customer_id, a.product_name, a.구매연도, b.구매연도 as 다음연도
from cte_select a left join cte_select b
on a.customer_id = b.customer_id
and a.product_name = b.product_name
and a.구매연도+'1 year' = b.구매연도
)
-- 3. 연도별 구매자 수 집계하여 재구매율 계산
, cte_reorder_ratio as (
select product_name, to_char(구매연도, 'YYYY')
, count(구매연도) as 당해구매자수
, count(다음연도) as 다음해구매자수
, round(count(다음연도)::numeric / count(구매연도)::numeric *100, 2)||'%' as 재구매율
from cte_select_next_year
group by 1,2
order by 1,2
)
select * from cte_reorder_ratio;
'STARTERS 4기 > [STARTERS] TIL' 카테고리의 다른 글
[STARTERS 4기 TIL] #53일차(23.04.20) (0) | 2023.04.23 |
---|---|
[STARTERS 4기 TIL] #52일차(23.04.19) (0) | 2023.04.23 |
[STARTERS 4기 TIL] #50일차(23.04.17) (0) | 2023.04.17 |
[STARTERS 4기 TIL] #49일차(23.04.14) (0) | 2023.04.14 |
[STARTERS 4기 TIL] #48일차(23.04.13) (0) | 2023.04.13 |