[STARTERS 4기 TIL] #51일차(23.04.18)

 


📚 오늘의  학습

 

1. RFM 분석
1. RFM 분석

STEP 1️⃣   RFM 집계

STEP 2️⃣   단계 정의

STEP 3️⃣   RFM 점수 부여

📌 1️⃣ RFM 집계 📌

with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 ) 
-- 1. 필요한 데이터
, cte_customer_maxo as (
select *, max(order_date) over() as maxo
from cte_customers
)
-- 2. RFM 산출
, cte_rfm as (
select customer_id 
	 , max(maxo) - max(order_date) as Recency
	 , count(distinct order_id) as Frequency
	 , sum(amount) as Monetary
from cte_customer_maxo
group by customer_id 
order by 2 , 3 desc, 4 desc
)
select *
from cte_rfm;

 

📌 2️⃣ 단계 정의 📌

, cte_ntile as (
select customer_id 
	 , recency, ntile(5) over(order by recency desc) as r
	 , frequency, ntile(5) over(order by frequency) as f
	 , monetary, ntile(5) over(order by monetary) as m
from cte_rfm
)

 

📌 3️⃣ RFM 점수 부여 📌

, cte_rfm_score as (
select *
	 , case 
	 	when recency <= 6 then 5
	 	when recency <= 15 then 4
	 	when recency <= 30 then 3
	 	when recency <= 70 then 2
	 	else 1
	 end as r
	 , case 
	 	when frequency >= 14 then 5
	 	when frequency >= 10 then 4
	 	when frequency >= 7 then 3
	 	when frequency >= 5 then 2
	 	else 1
	 end as f
	 , case 
	 	when monetary >= 22000 then 5
	 	when monetary >= 12000 then 4
	 	when monetary >= 5500 then 3
	 	when monetary >= 3000 then 2
	 	else 1
	 end as m
from cte_rfm
order by r desc, f desc, m desc
)

 

 

2. RFM 활용

✅ 고객 가치 산정

✅ 점수 별 고객 수

✅ 과거 우수 고객이었으나, 최근 구매하지 않은 고객

✅ 고객 분류

✅ 핵심 고객 구매 이력 추출

📌 ✅ 고객 가치 산정 📌

, cte_rfm_total_score as (
select customer_id , r, f, m, r+f+m as total_score
from cte_rfm_score
order by total_score desc
)

 

📌 ✅ 점수 별 고객 수 📌

, cte_score_cnt as (
select total_score, count(*) as cnt
from cte_rfm_total_score
group by 1
order by 1 desc
)

 

📌 ✅ 과거 우수 고객이었으나, 최근 구매하지 않은 고객 📌

, cte_f5m5 as (
select *
from cte_rfm_total_score
where r in (1,2,3,4) and f=5 and m=5
)

 

📌 ✅ 고객 분류 📌

, cte_rf_count as (
select r, f, count(*) as cnt
from cte_rfm_total_score
group by r, f
order by r desc, f desc
)
, cte_rf_count_pivot as (
select r
	 , coalesce(max(case when f = 5 then cnt end), 0) as f5
	 , coalesce(max(case when f = 4 then cnt end), 0) as f4
	 , coalesce(max(case when f = 3 then cnt end), 0) as f3
	 , coalesce(max(case when f = 2 then cnt end), 0) as f2
	 , coalesce(max(case when f = 1 then cnt end), 0) as f1
from cte_rf_count
group by r
order by r desc
)

 

📌 ✅ 핵심 고객 구매 이력 추출 📌

, cte_power_users_order  as (
select *
from cte_customers a1, cte_rfm_total_score a2
where a1.customer_id = a2.customer_id
  and a2.total_score = 15
order by a1.customer_id, order_id
)
select *
from cte_power_users_order;

 

 

2. 재구매율
1. 연도별 재구매율
-- 1. 고객, 구매연도를 중복되지 않게 불러온다.
 , cte_select as (
 select distinct customer_id, year 
 from cte_customers
 order by 1
 )
 -- 2. 다음 연도와 매칭되도록 self join
 , cte_select_next_year as (
 select a.customer_id, a.year, b.year as next_year
 from cte_select a left join cte_select b
   on a.customer_id = b.customer_id
 and a.year::int+1 = b.year::int
 )
 -- 3. 연도별 구매자 수 집계하여 재구매율 계산
 , cte_reorder_ratio as (
 select year
 	  , count(year) as 당해구매자수
 	  , count(next_year) as 다음해구매자수
 	  , round(count(next_year)::numeric / count(customer_id)::numeric *100, 2)||'%' as 재구매율
 from cte_select_next_year
 group by year
 )

 

2. 월별 재구매율

 

-- 1. 고객, 구매연도를 중복되지 않게 불러온다.
 , cte_select as (
 select distinct customer_id
 	  , date_trunc('month', order_date) as 구매월
 from cte_customers
 order by 1
 )
 -- 2. 다음 월과 매칭되도록 self join
 , cte_select_next_month as (
 select a.customer_id, a.구매월, b.구매월 as 다음월
 from cte_select a left join cte_select b
   on a.customer_id = b.customer_id
 and a.구매월+'1 month' = b.구매월
 )
 -- 3. 월별 구매자 수 집계하여 재구매율 계산
 , cte_reorder_ratio as (
 select to_char(구매월, 'YYYY-MM')
 	  , count(구매월) as 당해구매자수
 	  , count(다음월) as 다음해구매자수
 	  , round(count(다음월)::numeric / count(구매월)::numeric *100, 2)||'%' as 재구매율
 from cte_select_next_month
 group by 1
 order by 1
 )

 

 

3. 이탈 고객 분석
비활동 고객 전환 비율 (마지막 구매일 이후 90일 이상 경과한 고객의 비율) 
기준일 : order_date의 max값
-- 1. 고객별 최종 구매일 추춣
 , cte_customer_last_order as (
 select customer_id, max(order_date) as 최종구매일
 from cte_customers
 group by customer_id
  )
 --2. 경과일 계산
 , cte_date_calc as (
 select *, max(최종구매일) over() - 최종구매일 as 경과일
 from cte_customer_last_order
 )
 -- 3. 이탈여부
 , cte_out as (
 select *
 	  , case when 경과일>=90 then 1 else 0 end as 이탈여부
 from cte_date_calc
 )
 -- 4. 이탈률
 , cte_out_ratio as (
 select count(*) as 전체고객수
 	  , sum(이탈여부) as 이탈고객수
 	  , sum(이탈여부)/count(*)::numeric*100 as 이탈률
 from cte_out
 )
 -- 5. 이탈 고객 구매 이력 추출
 , cte_out_customers as (
 select *
 from cte_out a1, cte_customers a2
 where a1.customer_id = a2.customer_id
   and 이탈여부 = 1
 )

 

 

4. SQL 함수 정리

 

 

💻 오늘의  과제

제품의 연도별 재구매율(동일한 고객이 재구매)
with cte_customers as (
select c.customer_id , c.company_name , c.contact_title , c.country , c.city 
	 , o.order_id , o.order_date 
	 , to_char(o.order_date, 'YYYY') as year
	 , to_char(o.order_date, 'mm') as month
	 , to_char(o.order_date, 'dd') as day
	 , to_char(o.order_date, 'q') as quarter
	 , od.product_id , od.unit_price , od.quantity , od.discount 
	 , od.unit_price * od.quantity * (1-od.discount) as amount
	 , p.product_name 
	 , c2.category_id , c2.category_name 
from customers c , orders o , order_details od , products p , categories c2 
where c.customer_id = o.customer_id 
  and o.order_id = od.order_id 
  and od.product_id = p.product_id 
  and p.category_id = c2.category_id 
 ) 
 -- 1. 고객, 구매연도를 중복되지 않게 불러온다.
 , cte_select as (
 select distinct customer_id
 	  , product_name
 	  , date_trunc('year', order_date) as 구매연도
 from cte_customers
 order by 1
 )
 -- 2. 다음 연도와 매칭되도록 self join
 , cte_select_next_year as (
 select a.customer_id, a.product_name, a.구매연도, b.구매연도 as 다음연도
 from cte_select a left join cte_select b
   on a.customer_id = b.customer_id
 and a.product_name = b.product_name
 and a.구매연도+'1 year' = b.구매연도
 )
 -- 3. 연도별 구매자 수 집계하여 재구매율 계산
 , cte_reorder_ratio as (
 select product_name, to_char(구매연도, 'YYYY')
 	  , count(구매연도) as 당해구매자수
 	  , count(다음연도) as 다음해구매자수
 	  , round(count(다음연도)::numeric / count(구매연도)::numeric *100, 2)||'%' as 재구매율
 from cte_select_next_year
 group by 1,2
 order by 1,2
 )
 select * from cte_reorder_ratio;