[STARTERS 4기 TIL] #53일차(23.04.20)


[SQL 미니 프로젝트]

2일차 : SQL 분석 실시

 

카테고리별 매출 상위 top3

1. 매출 top10 카테고리

 

----------- RFM
-- 📌 지표 및 우선순위
-- 1. Recency : 얼마나 최근에 구매?
-- 2. Frequency : 얼마나 빈번하게 구매?
-- 3. Monetary : 얼마나 많은 금액을 지불?

with cte_main as (
select distinct o.order_id
	 , c.customer_unique_id
	 , o.order_status
	 , date_trunc('day', o.order_purchase_timestamp) as order_date
	 , oi.price
from orders o
	 inner join customers c on o.customer_id = c.customer_id
	 inner join order_items oi on o.order_id = oi.order_id
	 left join products p on oi.product_id = p.product_id
	 left join product_category_name_translation pc on p.product_category_name = pc.product_category_name
	 left join reviews r on o.order_id = r.order_id
where order_status = 'delivered'
)
---------- 1. RFM 구하기 (사율) -----------
-- 1. r 구하기 위한 max order_date
, cte_customer_maxo as (
select *
	 , MAX(order_date) over() as maxo
from cte_main
)
-- 2. max(order_date) - order_date 테이블
, cte_rfm as (
select customer_unique_id
	 , max(maxo) - max(order_date) as Recency
	 , count(distinct order_id) as Frequency
	 , sum(price) as Monetary
from cte_customer_maxo
group by 1
order by 2 asc, 3 desc, 4 desc
)
-- 3. tmp
, cte_rfm_score as (
select customer_unique_id
	 , recency
	 , ntile(10) over (order by recency desc) as r
	 , frequency
	 , case 
	 	when frequency < 2 then 1
	 	when frequency = 2 then 2
	 	when frequency <= 4 then 3
	 	else 4
	 end as f
	 , monetary
	 , ntile(5) over (order by monetary asc) as m
from cte_rfm
order by r desc, f desc, m desc
)
-- 4. total_score
, cte_rfm_score01 as (
select customer_unique_id, r,f,m
from cte_rfm_score
)
-- 5. rfm_01
, cte_rfm_final as (
select customer_unique_id
	 , case when r >= 9 then 1 else 0 end recency
	 , case when f >= 2 then 1 else 0 end frequency
	 , case when m > 4 then 1 else 0 end monetary 
from cte_rfm_score01
)
-- 6. rfm_customer_status
, rfm_customer_status as (
select customer_unique_id, recency, frequency, monetary
	 , case 
		 when recency = 1 and frequency = 1 and monetary = 1 then 'VIP고객'
	 	 when recency = 1 and frequency = 1 and monetary = 0 then '충성고객'
	 	 when recency = 1 and frequency = 0 and monetary = 1 then '잠재VIP고객'
	 	 when recency = 1 and frequency = 0 and monetary = 0 then '잠재충성고객'
	 	 when recency = 0 and frequency = 1 and monetary = 1 then '놓치면안될고객'
	 	 when recency = 0 and frequency = 1 and monetary = 0 then '관심필요고객'
	 	 when recency = 0 and frequency = 0 and monetary = 1 then '이탈우려고객'
	 	 when recency = 0 and frequency = 0 and monetary = 0 then '겨울잠고객'
	 	 else null
	 end as 고객등급
from cte_rfm_final
order by 2 desc, 3 desc ,4 desc
)
---------------- 2. rfm + 매출 top10 카테고리 (민지) ---------------------
, cte_rfm_product as (
select pcnt.product_category_name_english
	 , oi.price
	 , a1.*
from rfm_customer_status a1
	, customers c2 
	, orders o
	, order_items oi
	, products p
	, product_category_name_translation pcnt
where a1.customer_unique_id = c2.customer_unique_id 
  and c2.customer_id = o.customer_id 
  and o.order_id = oi.order_id
  and oi.product_id = p.product_id 
  and p.product_category_name = pcnt.product_category_name 
 )
 -- 1. 필요 rfm만 고르기
, cte_rfm_product_vip as (
select *
from cte_rfm_product
where 고객등급 = 'VIP고객'
   or 고객등급 = '충성고객'
   or 고객등급 = '잠재VIP고객'
   or 고객등급 = '잠재충성고객'
   or 고객등급 = '놓치면안될고객'
)
-- 2. 매출 상위 top10 카테고리
, cte_category_sales as (  
select product_category_name_english , sum(price) as sales
from cte_rfm_product_vip 
group by product_category_name_english 
order by 2 desc
limit 10
)

2. 1에 속한 카테고리의 재구매율

---------------- 3. rfm + 매출 top10 카테고리 +카테고리 별 재구매율 (민지) ---------------------
, cte_top_sale_category as (
select distinct c3.customer_unique_id, a2.product_category_name_english
	, to_char(o2.order_purchase_timestamp, 'YYYY') as year 
from cte_category_sales a2
	, product_category_name_translation pcnt2 
	, products p2 
	, order_items oi2 
	, orders o2 
	, customers c3 
where a2.product_category_name_english = pcnt2.product_category_name_english 
  and pcnt2.product_category_name = p2.product_category_name
  and p2.product_id = oi2.product_id
  and o2.order_id = oi2.order_id  
  and c3.customer_id = o2.customer_id 
)
-- self join
, cte_join as (
select a.product_category_name_english, a.customer_unique_id, a.year, b.year as next_year
from cte_top_sale_category a left join cte_top_sale_category b
on a.product_category_name_english = b.product_category_name_english
and a.customer_unique_id = b.customer_unique_id
and a.year::int + 1 = b.year::int
)
, cte_reorder as (
select product_category_name_english, year
	,count(year) as 구매자수
	,count(next_year) as 재구매자수
	,round(count(next_year)/count(year)::numeric *100,2) as num
	,round(count(next_year)/count(year)::numeric *100,2)::varchar||'%' as 재구매율
from cte_join
group by 1, 2
order by 1, 2
)
select * from cte_reorder
order by 1,2 ;

 

재구매율 

1. 월별 재구매율

-- 월별 재구매율
with
cte_customers as (
select o.order_purchase_timestamp as order_date
	 , o.order_id
	 , op.payment_value
	 , c.customer_unique_id
from orders o , order_payments op , customers c
where o.order_id = op.order_id
  and o.customer_id = c.customer_id
)
, cte_select as (
select customer_unique_id , date_trunc('month', order_date) as month
from cte_customers
order by 2
)
, cte_join as (
select a.customer_unique_id, a.month, b.month as next_month
from cte_select a left join cte_select b
  on a.customer_unique_id = b.customer_unique_id
 and a.month + '1mon' = b.month
)
-- 월별 구매자수, 재구매자수, 재구매율
, cte_reorder as (
select to_char(month, 'YYYY-MM') as 월 
	 , count(month) as 구매자수 
     , count(next_month) as 재구매자수 
     , round(count(next_month)/count(month)::numeric *100,2)::varchar||'%' as 재구매율
from cte_join
group by 1
order by 1
)
select * from cte_reorder;

 

2. 카테고리별 연도별 재주문율

-- 연별 카테고리별 재주문율
with cte_customers as (
select o.order_purchase_timestamp as order_date
	 , o.order_id ,op.payment_value 
     , c.customer_unique_id 
     , p.product_category_name 
     , pcnt.product_category_name_english
from orders o , order_payments op , customers c , products p , order_items oi , product_category_name_translation pcnt
where o.order_id = op.order_id 
  and o.customer_id = c.customer_id 
  and o.order_id = oi.order_id 
  and oi.product_id = p.product_id 
  and p.product_category_name = pcnt.product_category_name
)
-- 연별 카테고리별 고객수 중복없는
, cte_select as (
select distinct customer_unique_id
	 , product_category_name_english 
     , to_char(order_date, 'YYYY') as year
from cte_customers 
)
-- self join , cte_join as (
select a.product_category_name_english
	 , a.customer_unique_id
     , a.year
     , b.year as next_year
from cte_select a left join cte_select b 
  on a.product_category_name_english = b.product_category_name_english 
 and a.customer_unique_id = b.customer_unique_id 
 and a.year::int + 1 = b.year::int 
)
, cte_reorder as (
select product_category_name_english
	 , year 
     , count(year) as 구매자수 
     , count(next_year) as 재구매자수 
     , round(count(next_year)/count(year)::numeric *100,2)::varchar||'%' as 재구매율
from cte_join
group by 1, 2
order by 1, 2
)
select *
from cte_reorder
order by 재구매자수 desc;

 

예상 배송기간 - 주문건수 간 상관관계 

-- 주문자와 판매자가 거리, 예상 배송기간, 주문건수 상관관계
with
cte_customers as (
select o.order_purchase_timestamp as 구매일시, o.order_id, o.order_estimated_delivery_date as 예상배송일시
	,op.payment_value
	,c.customer_unique_id
	,p.product_category_name , pcnt.product_category_name_english 
	,oi.price 
from orders o , order_payments op , customers c , products p , order_items oi , product_category_name_translation pcnt 
where o.order_id = op.order_id 
and o.customer_id = c.customer_id 
and o.order_id  = oi.order_id 
and oi.product_id = p.product_id 
and p.product_category_name = pcnt.product_category_name 
)
, cte_select as (
select to_char(예상배송일시-구매일시, 'dd') as 예상배송기간
	,count(distinct order_id) as 주문건수
from cte_customers
group by 1
order by 1
)
, cte_corr as (
select corr(예상배송기간::numeric , 주문건수)
from cte_select
where 예상배송기간::numeric <= 21
)
select * from cte_corr;
----------- RFM
-- 📌 지표 및 우선순위
-- 1. Recency : 얼마나 최근에 구매?
-- 2. Frequency : 얼마나 빈번하게 구매?
-- 3. Monetary : 얼마나 많은 금액을 지불?

with cte_main as (
select distinct o.order_id
	 , c.customer_unique_id
	 , o.order_status
	 , date_trunc('day', o.order_purchase_timestamp) as order_date
	 , oi.price
	 , pc.product_category_name_english
from orders o
	 inner join customers c on o.customer_id = c.customer_id
	 inner join order_items oi on o.order_id = oi.order_id
	 left join products p on oi.product_id = p.product_id
	 left join product_category_name_translation pc on p.product_category_name = pc.product_category_name
	 left join reviews r on o.order_id = r.order_id
where order_status = 'delivered'
)
---------- RFM -----------
-- 1. r 구하기 위한 max order_date
, cte_customer_maxo as (
select *
	 , MAX(order_date) over() as maxo
from cte_main
)
-- 2. max(order_date) - order_date 테이블
, cte_rfm as (
select customer_unique_id
	 , max(maxo) - max(order_date) as Recency
	 , count(distinct order_id) as Frequency
	 , sum(price) as Monetary
from cte_customer_maxo
group by 1
order by 2 asc, 3 desc, 4 desc
)
-- 3. tmp
, cte_rfm_score as (
select customer_unique_id
	 , recency
	 , ntile(10) over (order by recency desc) as r
	 , frequency
	 , case 
	 	when frequency < 2 then 1
	 	when frequency = 2 then 2
	 	when frequency <= 4 then 3
	 	else 4
	 end as f
	 , monetary
	 , ntile(5) over (order by monetary ) as m
from cte_rfm
order by r desc, f desc, m desc
)
-- 4. total_score
, cte_rfm_score01 as (
select customer_unique_id, r,f,m
from cte_rfm_score
)
-- 5. rfm_01
, cte_rfm_final as (
select customer_unique_id
	 , case when r >= 9 then 1 else 0 end recency
	 , case when f >= 2 then 1 else 0 end frequency
	 , case when m > 4 then 1 else 0 end monetary 
from cte_rfm_score01
)
-- 6. rfm_customer_status
, rfm_customer_status as (
select customer_unique_id, recency, frequency, monetary
	 , case 
		 when recency = 1 and frequency = 1 and monetary = 1 then 'VIP고객'
	 	 when recency = 1 and frequency = 1 and monetary = 0 then '충성고객'
	 	 when recency = 1 and frequency = 0 and monetary = 1 then '잠재VIP고객'
	 	 when recency = 1 and frequency = 0 and monetary = 0 then '잠재충성고객'
	 	 when recency = 0 and frequency = 1 and monetary = 1 then '놓치면안될고객'
	 	 when recency = 0 and frequency = 1 and monetary = 0 then '관심필요고객'
	 	 when recency = 0 and frequency = 0 and monetary = 1 then '이탈우려고객'
	 	 when recency = 0 and frequency = 0 and monetary = 0 then '겨울잠고객'
	 	 else null
	 end as 고객상태
from cte_rfm_final
order by 2 desc, 3 desc ,4 desc
)
, cte_customer as (
select 고객상태
	 , recency
	 , frequency
	 , monetary
	 , count(customer_unique_id) as customer_cnt
from rfm_customer_status
group by 1,2,3,4
order by 2 desc, 3 desc,4 desc
)
, cte_customer_info as (
select a.customer_unique_id, a.고객상태, b.price, b.product_category_name_english, b.order_date
from rfm_customer_status a , cte_main b
where a.customer_unique_id = b.customer_unique_id
and a.고객상태 = 'VIP고객' or a.고객상태 = '충성고객' or a.고객상태 = '잠재VIP고객' or a.고객상태 = '잠재충성고객' or a.고객상태 = '놓치면안될고객'
)
-- 연별 카테고리별 고객수 중복없는
, cte_select as (
select  distinct customer_unique_id, product_category_name_english
	, to_char(order_date, 'YYYY') as year 
from cte_customer_info
)
select * from cte_select;
-- self join
, cte_join as (
select a.product_category_name_english, a.customer_unique_id, a.year, b.year as next_year
from cte_select a left join cte_select b
on a.product_category_name_english = b.product_category_name_english
and a.customer_unique_id = b.customer_unique_id
and a.year::int + 1 = b.year::int
)
, cte_reorder as (
select product_category_name_english, year
	,count(year) as 구매자수
	,count(next_year) as 재구매자수
	,round(count(next_year)/count(year)::numeric *100,2)::varchar||'%' as 재구매율
from cte_join
group by 1, 2
order by 1, 2
)
select * from cte_reorder
order by 재구매자수 desc;

 

 

TOP 10 카테고리가 잘 팔리는 도시 TOP 3

---------------- 3. 지역분석 (지수) ---------------------
, cte_city_category as (
select a.product_category_name_english, b.customer_city, count(*) as 구매횟수
from cte_category_sales a, cte_rfm_product_vip b
where a.product_category_name_english = b.product_category_name_english
group by 1,2
order by 1, 3 desc
)
, cte_city_category_rank as (
select *
	, row_number() over(partition by product_category_name_english order by 구매횟수 desc) as rank
from cte_city_category
)
select product_category_name_english
	, max(case when rank=1 then customer_city end) as "1위"
	, max(case when rank=2 then customer_city end) as "2위"
	, max(case when rank=3 then customer_city end) as "3위"
from cte_city_category_rank
group by 1;