[SQL 미니 프로젝트]
2일차 : SQL 분석 실시
카테고리별 매출 상위 top3
1. 매출 top10 카테고리
----------- RFM
-- 📌 지표 및 우선순위
-- 1. Recency : 얼마나 최근에 구매?
-- 2. Frequency : 얼마나 빈번하게 구매?
-- 3. Monetary : 얼마나 많은 금액을 지불?
with cte_main as (
select distinct o.order_id
, c.customer_unique_id
, o.order_status
, date_trunc('day', o.order_purchase_timestamp) as order_date
, oi.price
from orders o
inner join customers c on o.customer_id = c.customer_id
inner join order_items oi on o.order_id = oi.order_id
left join products p on oi.product_id = p.product_id
left join product_category_name_translation pc on p.product_category_name = pc.product_category_name
left join reviews r on o.order_id = r.order_id
where order_status = 'delivered'
)
---------- 1. RFM 구하기 (사율) -----------
-- 1. r 구하기 위한 max order_date
, cte_customer_maxo as (
select *
, MAX(order_date) over() as maxo
from cte_main
)
-- 2. max(order_date) - order_date 테이블
, cte_rfm as (
select customer_unique_id
, max(maxo) - max(order_date) as Recency
, count(distinct order_id) as Frequency
, sum(price) as Monetary
from cte_customer_maxo
group by 1
order by 2 asc, 3 desc, 4 desc
)
-- 3. tmp
, cte_rfm_score as (
select customer_unique_id
, recency
, ntile(10) over (order by recency desc) as r
, frequency
, case
when frequency < 2 then 1
when frequency = 2 then 2
when frequency <= 4 then 3
else 4
end as f
, monetary
, ntile(5) over (order by monetary asc) as m
from cte_rfm
order by r desc, f desc, m desc
)
-- 4. total_score
, cte_rfm_score01 as (
select customer_unique_id, r,f,m
from cte_rfm_score
)
-- 5. rfm_01
, cte_rfm_final as (
select customer_unique_id
, case when r >= 9 then 1 else 0 end recency
, case when f >= 2 then 1 else 0 end frequency
, case when m > 4 then 1 else 0 end monetary
from cte_rfm_score01
)
-- 6. rfm_customer_status
, rfm_customer_status as (
select customer_unique_id, recency, frequency, monetary
, case
when recency = 1 and frequency = 1 and monetary = 1 then 'VIP고객'
when recency = 1 and frequency = 1 and monetary = 0 then '충성고객'
when recency = 1 and frequency = 0 and monetary = 1 then '잠재VIP고객'
when recency = 1 and frequency = 0 and monetary = 0 then '잠재충성고객'
when recency = 0 and frequency = 1 and monetary = 1 then '놓치면안될고객'
when recency = 0 and frequency = 1 and monetary = 0 then '관심필요고객'
when recency = 0 and frequency = 0 and monetary = 1 then '이탈우려고객'
when recency = 0 and frequency = 0 and monetary = 0 then '겨울잠고객'
else null
end as 고객등급
from cte_rfm_final
order by 2 desc, 3 desc ,4 desc
)
---------------- 2. rfm + 매출 top10 카테고리 (민지) ---------------------
, cte_rfm_product as (
select pcnt.product_category_name_english
, oi.price
, a1.*
from rfm_customer_status a1
, customers c2
, orders o
, order_items oi
, products p
, product_category_name_translation pcnt
where a1.customer_unique_id = c2.customer_unique_id
and c2.customer_id = o.customer_id
and o.order_id = oi.order_id
and oi.product_id = p.product_id
and p.product_category_name = pcnt.product_category_name
)
-- 1. 필요 rfm만 고르기
, cte_rfm_product_vip as (
select *
from cte_rfm_product
where 고객등급 = 'VIP고객'
or 고객등급 = '충성고객'
or 고객등급 = '잠재VIP고객'
or 고객등급 = '잠재충성고객'
or 고객등급 = '놓치면안될고객'
)
-- 2. 매출 상위 top10 카테고리
, cte_category_sales as (
select product_category_name_english , sum(price) as sales
from cte_rfm_product_vip
group by product_category_name_english
order by 2 desc
limit 10
)
2. 1에 속한 카테고리의 재구매율
---------------- 3. rfm + 매출 top10 카테고리 +카테고리 별 재구매율 (민지) ---------------------
, cte_top_sale_category as (
select distinct c3.customer_unique_id, a2.product_category_name_english
, to_char(o2.order_purchase_timestamp, 'YYYY') as year
from cte_category_sales a2
, product_category_name_translation pcnt2
, products p2
, order_items oi2
, orders o2
, customers c3
where a2.product_category_name_english = pcnt2.product_category_name_english
and pcnt2.product_category_name = p2.product_category_name
and p2.product_id = oi2.product_id
and o2.order_id = oi2.order_id
and c3.customer_id = o2.customer_id
)
-- self join
, cte_join as (
select a.product_category_name_english, a.customer_unique_id, a.year, b.year as next_year
from cte_top_sale_category a left join cte_top_sale_category b
on a.product_category_name_english = b.product_category_name_english
and a.customer_unique_id = b.customer_unique_id
and a.year::int + 1 = b.year::int
)
, cte_reorder as (
select product_category_name_english, year
,count(year) as 구매자수
,count(next_year) as 재구매자수
,round(count(next_year)/count(year)::numeric *100,2) as num
,round(count(next_year)/count(year)::numeric *100,2)::varchar||'%' as 재구매율
from cte_join
group by 1, 2
order by 1, 2
)
select * from cte_reorder
order by 1,2 ;
재구매율
1. 월별 재구매율
-- 월별 재구매율
with
cte_customers as (
select o.order_purchase_timestamp as order_date
, o.order_id
, op.payment_value
, c.customer_unique_id
from orders o , order_payments op , customers c
where o.order_id = op.order_id
and o.customer_id = c.customer_id
)
, cte_select as (
select customer_unique_id , date_trunc('month', order_date) as month
from cte_customers
order by 2
)
, cte_join as (
select a.customer_unique_id, a.month, b.month as next_month
from cte_select a left join cte_select b
on a.customer_unique_id = b.customer_unique_id
and a.month + '1mon' = b.month
)
-- 월별 구매자수, 재구매자수, 재구매율
, cte_reorder as (
select to_char(month, 'YYYY-MM') as 월
, count(month) as 구매자수
, count(next_month) as 재구매자수
, round(count(next_month)/count(month)::numeric *100,2)::varchar||'%' as 재구매율
from cte_join
group by 1
order by 1
)
select * from cte_reorder;
2. 카테고리별 연도별 재주문율
-- 연별 카테고리별 재주문율
with cte_customers as (
select o.order_purchase_timestamp as order_date
, o.order_id ,op.payment_value
, c.customer_unique_id
, p.product_category_name
, pcnt.product_category_name_english
from orders o , order_payments op , customers c , products p , order_items oi , product_category_name_translation pcnt
where o.order_id = op.order_id
and o.customer_id = c.customer_id
and o.order_id = oi.order_id
and oi.product_id = p.product_id
and p.product_category_name = pcnt.product_category_name
)
-- 연별 카테고리별 고객수 중복없는
, cte_select as (
select distinct customer_unique_id
, product_category_name_english
, to_char(order_date, 'YYYY') as year
from cte_customers
)
-- self join , cte_join as (
select a.product_category_name_english
, a.customer_unique_id
, a.year
, b.year as next_year
from cte_select a left join cte_select b
on a.product_category_name_english = b.product_category_name_english
and a.customer_unique_id = b.customer_unique_id
and a.year::int + 1 = b.year::int
)
, cte_reorder as (
select product_category_name_english
, year
, count(year) as 구매자수
, count(next_year) as 재구매자수
, round(count(next_year)/count(year)::numeric *100,2)::varchar||'%' as 재구매율
from cte_join
group by 1, 2
order by 1, 2
)
select *
from cte_reorder
order by 재구매자수 desc;
예상 배송기간 - 주문건수 간 상관관계
-- 주문자와 판매자가 거리, 예상 배송기간, 주문건수 상관관계
with
cte_customers as (
select o.order_purchase_timestamp as 구매일시, o.order_id, o.order_estimated_delivery_date as 예상배송일시
,op.payment_value
,c.customer_unique_id
,p.product_category_name , pcnt.product_category_name_english
,oi.price
from orders o , order_payments op , customers c , products p , order_items oi , product_category_name_translation pcnt
where o.order_id = op.order_id
and o.customer_id = c.customer_id
and o.order_id = oi.order_id
and oi.product_id = p.product_id
and p.product_category_name = pcnt.product_category_name
)
, cte_select as (
select to_char(예상배송일시-구매일시, 'dd') as 예상배송기간
,count(distinct order_id) as 주문건수
from cte_customers
group by 1
order by 1
)
, cte_corr as (
select corr(예상배송기간::numeric , 주문건수)
from cte_select
where 예상배송기간::numeric <= 21
)
select * from cte_corr;
----------- RFM
-- 📌 지표 및 우선순위
-- 1. Recency : 얼마나 최근에 구매?
-- 2. Frequency : 얼마나 빈번하게 구매?
-- 3. Monetary : 얼마나 많은 금액을 지불?
with cte_main as (
select distinct o.order_id
, c.customer_unique_id
, o.order_status
, date_trunc('day', o.order_purchase_timestamp) as order_date
, oi.price
, pc.product_category_name_english
from orders o
inner join customers c on o.customer_id = c.customer_id
inner join order_items oi on o.order_id = oi.order_id
left join products p on oi.product_id = p.product_id
left join product_category_name_translation pc on p.product_category_name = pc.product_category_name
left join reviews r on o.order_id = r.order_id
where order_status = 'delivered'
)
---------- RFM -----------
-- 1. r 구하기 위한 max order_date
, cte_customer_maxo as (
select *
, MAX(order_date) over() as maxo
from cte_main
)
-- 2. max(order_date) - order_date 테이블
, cte_rfm as (
select customer_unique_id
, max(maxo) - max(order_date) as Recency
, count(distinct order_id) as Frequency
, sum(price) as Monetary
from cte_customer_maxo
group by 1
order by 2 asc, 3 desc, 4 desc
)
-- 3. tmp
, cte_rfm_score as (
select customer_unique_id
, recency
, ntile(10) over (order by recency desc) as r
, frequency
, case
when frequency < 2 then 1
when frequency = 2 then 2
when frequency <= 4 then 3
else 4
end as f
, monetary
, ntile(5) over (order by monetary ) as m
from cte_rfm
order by r desc, f desc, m desc
)
-- 4. total_score
, cte_rfm_score01 as (
select customer_unique_id, r,f,m
from cte_rfm_score
)
-- 5. rfm_01
, cte_rfm_final as (
select customer_unique_id
, case when r >= 9 then 1 else 0 end recency
, case when f >= 2 then 1 else 0 end frequency
, case when m > 4 then 1 else 0 end monetary
from cte_rfm_score01
)
-- 6. rfm_customer_status
, rfm_customer_status as (
select customer_unique_id, recency, frequency, monetary
, case
when recency = 1 and frequency = 1 and monetary = 1 then 'VIP고객'
when recency = 1 and frequency = 1 and monetary = 0 then '충성고객'
when recency = 1 and frequency = 0 and monetary = 1 then '잠재VIP고객'
when recency = 1 and frequency = 0 and monetary = 0 then '잠재충성고객'
when recency = 0 and frequency = 1 and monetary = 1 then '놓치면안될고객'
when recency = 0 and frequency = 1 and monetary = 0 then '관심필요고객'
when recency = 0 and frequency = 0 and monetary = 1 then '이탈우려고객'
when recency = 0 and frequency = 0 and monetary = 0 then '겨울잠고객'
else null
end as 고객상태
from cte_rfm_final
order by 2 desc, 3 desc ,4 desc
)
, cte_customer as (
select 고객상태
, recency
, frequency
, monetary
, count(customer_unique_id) as customer_cnt
from rfm_customer_status
group by 1,2,3,4
order by 2 desc, 3 desc,4 desc
)
, cte_customer_info as (
select a.customer_unique_id, a.고객상태, b.price, b.product_category_name_english, b.order_date
from rfm_customer_status a , cte_main b
where a.customer_unique_id = b.customer_unique_id
and a.고객상태 = 'VIP고객' or a.고객상태 = '충성고객' or a.고객상태 = '잠재VIP고객' or a.고객상태 = '잠재충성고객' or a.고객상태 = '놓치면안될고객'
)
-- 연별 카테고리별 고객수 중복없는
, cte_select as (
select distinct customer_unique_id, product_category_name_english
, to_char(order_date, 'YYYY') as year
from cte_customer_info
)
select * from cte_select;
-- self join
, cte_join as (
select a.product_category_name_english, a.customer_unique_id, a.year, b.year as next_year
from cte_select a left join cte_select b
on a.product_category_name_english = b.product_category_name_english
and a.customer_unique_id = b.customer_unique_id
and a.year::int + 1 = b.year::int
)
, cte_reorder as (
select product_category_name_english, year
,count(year) as 구매자수
,count(next_year) as 재구매자수
,round(count(next_year)/count(year)::numeric *100,2)::varchar||'%' as 재구매율
from cte_join
group by 1, 2
order by 1, 2
)
select * from cte_reorder
order by 재구매자수 desc;
TOP 10 카테고리가 잘 팔리는 도시 TOP 3
---------------- 3. 지역분석 (지수) ---------------------
, cte_city_category as (
select a.product_category_name_english, b.customer_city, count(*) as 구매횟수
from cte_category_sales a, cte_rfm_product_vip b
where a.product_category_name_english = b.product_category_name_english
group by 1,2
order by 1, 3 desc
)
, cte_city_category_rank as (
select *
, row_number() over(partition by product_category_name_english order by 구매횟수 desc) as rank
from cte_city_category
)
select product_category_name_english
, max(case when rank=1 then customer_city end) as "1위"
, max(case when rank=2 then customer_city end) as "2위"
, max(case when rank=3 then customer_city end) as "3위"
from cte_city_category_rank
group by 1;
'STARTERS 4기 > [STARTERS] TIL' 카테고리의 다른 글
[STARTERS 4기 TIL] #54일차(23.04.21) (0) | 2023.04.23 |
---|---|
[STARTERS 4기 TIL] #52일차(23.04.19) (0) | 2023.04.23 |
[STARTERS 4기 TIL] #51일차(23.04.18) (0) | 2023.04.18 |
[STARTERS 4기 TIL] #50일차(23.04.17) (0) | 2023.04.17 |
[STARTERS 4기 TIL] #49일차(23.04.14) (0) | 2023.04.14 |