tak's data blog
빠르게 보는 SQL 레시피 본문
SQL 레시피 책을 공부하면서 현업에서 쓰일법한 코드들을 타이핑 해두었고,
빠르게 필요할 때마다 보기 위해서 다음과 같이 정리해두었다.
한번 훑어보긴 했지만 아직 직접 적용해보지 못한 코드가 있어서 추후에 다시 작성해야겠다.
-- 사용자의 방문 빈도 집계하기
WITH
action_log_with_dt AS
(
SELECT *
,SUBSTR(STAMP, 1, 10) AS dt
FROM action_log
)
,action_day_count_per_user AS
(
SELECT
user_id
,COUNT(DISTINCT dt) AS action_day_count
FROM
action_log_with_dt
WHERE
dt BETWEEN '2016-11-01' AND '2016-11-07'
GROUP BY user_id
)
SELECT
action_day_count
,COUNT(DISTINCT user_id) AS user_count
FROM action_day_count_per_user
GROUP BY action_day_count
ORDER BY action_day_count;
-- 구매액이 많은 순서로 사용자 그룹을 10등분하는 쿼리
WITH user_purchase_amount AS
(
SELECT
user_id
,SUM(amount) AS purchase_amount
FROM action_log
WHERE action = 'purchase'
GROUP BY user_id
)
, users_with_decile AS
(
SELECT
user_id
,purchase_amount
,ntile(10) OVER (ORDER BY purchase_amount DESC) AS decile
FROM user_purchase_amount
)
SELECT *
FROM users_with_decile
-- 10분할한 Decile들을 집계하는 쿼리
WITH user_purchase_amount AS
(
SELECT
user_id
,SUM(amount) AS purchase_amount
FROM action_log
WHERE action = 'purchase'
GROUP BY user_id
)
, users_with_decile AS
(
SELECT
user_id
,purchase_amount
,ntile(10) OVER (ORDER BY purchase_amount DESC) AS decile
FROM user_purchase_amount
)
, decile_with_purchase_amount AS
(
SELECT
decile
,SUM(purchase_amount) AS amount
,AVG(purchase_amount) AS avg_amount
,SUM(SUM(purchase_amount)) OVER (ORDER BY decile) AS cumulative_amount
,SUM(SUM(purchase_amount)) OVER () AS total_amount
FROM users_with_decile
GROUP BY decile
)
SELECT *
FROM decile_with_purchase_amount
-- 사용자별로 RFM을 집계하는 쿼리
WITH purchase_log AS
(
SELECT
user_id
,amount
,substring(stamp,1,10) AS dt
FROM action_log
WHERE action = 'purchase'
),
user_rfm AS
(
SELECT
user_id
,MAx(dt) AS recent_date
,date_diff(CURRENT_DATE, date(timestamp(MAX(dt))), day) AS recency
,COUNT(dt) AS frequency
,SUM(amount) AS monetary
FROM purchase_log
GROUP BY user_id
)
SELECT *
FROM user_rfm
-- 사용자들의 RFM 랭크를 계산하는 쿼리
WITH user_rfm AS
(
-- 위 코드 참고
),
user_rfm_rank AS
(
SELECT
user_id
,recent_date
,recency
,frequency
,monetary
,CASE WHEN recency < 14 THEN 5
WHEN recency < 28 THEN 4
WHEN recency < 60 THEN 3
WHEN recency < 90 THEN 2
ELSE 1 END AS r
,CASE WHEN 20 <= frequency THEN 5
WHEN 10 <= frequency THEN 4
WHEN 5 <= frequency THEN 3
WHEN 2 <= frequency THEN 2
WHEN 1 = frequency THEN 1 END AS f
,CASE WHEN 300000 <= monetary THEN 5
WHEN 100000 <= frequency THEN 4
WHEN 300000 <= frequency THEN 3
WHEN 5000 <= frequency THEN 2
ELSE 1 END AS m
FROM user_rfm
)
SELECT *
FROM user_rfm_rank
-- 각 그룹의 사람 수를 확인하는 쿼리
WITH user_rfm AS
(
-- 위 코드 참고
),
user_rfm_rank AS
(
-- 위 코드 참고
),
mst_rfm_index AS
(
SELECT 1 AS rfm_index
UNION ALL SELECT 2 AS rfm_index
UNION ALL SELECT 3 AS rfm_index
UNION ALL SELECT 4 AS rfm_index
UNION ALL SELECT 5 AS rfm_index
),
rfm_flag AS (
SELECT
m.rfm_index
,CASE WHEN m.rfm_index = r.r THEN 1 ELSE 0 END AS r_flag
,CASE WHEN m.rfm_index = r.f THEN 1 ELSE 0 END AS f_flag
,CASE WHEN m.rfm_index = r.m THEN 1 ELSE 0 END AS m_flag
FROM mst_rfm_index AS m CROSS JOIN user_rfm_rank AS r
)
SELECT
rfm_index
,SUM(r_falg) AS r
,SUM(f_flag) AS f
,SUM(m_flag) AS m
FROM rfm_flag
GROUP BY rfm_index
ORDER BY rfm_index DESC
-- 통합 랭크를 계산하는 쿼리
WITH user_rfm AS
(
-- 위 코드 참고
),
user_rfm_rank AS
(
-- 위 코드 참고
)
SELECT
r+f+m AS total_rank
,r,f,m
,COUNT(user_id)
FROM user_rfm_rank
GROUP BY r,f,m
ORDER BY total_rank DESC, r DESC, f DESC, m DESC
-- 종합 랭크별로 사용자 수를 집계하는 쿼리
WITH user_rfm AS
(
-- 위 코드 참고
),
user_rfm_rank AS
(
-- 위 코드 참고
)
SELECT
r+f+m AS total_rank
,COUNT(user_id)
FROM user_rfm_rank
GROUP BY total_rank
ORDER BY total_rank DESC
-- R과 F를 사영해 2차원 사용자 층의 사용자 수를 집계하는 쿼리 (p.230)
WITH user_rfm AS
(
-- 위 코드 참고
),
user_rfm_rank AS
(
-- 위 코드 참고
)
SELECT
CONCAT('r_', r) AS r_rank
,COUNT(CASE WHEN f = 5 THEN 1 END) AS f_5
,COUNT(CASE WHEN f = 4 THEN 1 END) AS f_4
,COUNT(CASE WHEN f = 3 THEN 1 END) AS f_3
,COUNT(CASE WHEN f = 2 THEN 1 END) AS f_2
,COUNT(CASE WHEN f = 1 THEN 1 END) AS f_1
FROM user_rfm_rank
GROUP BY r
ORDER BY r_rank DESC
-- 매달 등록 수와 전월비를 계산하는 쿼리
WITH mst_users_with_year_month AS
(
SELECT
*
,substring(register_date,1,7) AS year_month
FROM mst_users
)
SELECT
year_month
,COUNT(DISTINCT user_id) AS register_count
,LAG(COUNT(DISTINCT user_id)) OVER (ORDER BY year_month) AS last_month_count
,1.0*COUNT(DISTINCT user_id)/LAG(COUNT(DISTINCT user_id)) OVER(ORDER BY year_month) AS month_over_month_ratio
FROM mst_users_with_year_month
GROUP BY year_month
-- 디바이스들의 등록 수를 집계하는 쿼리
WITH mst_users_with_year_month AS
(
-- 위 코드 참고
)
SELECT
year_month
,COUNT(DISTINCT user_id) AS register_count
,COUNT(DISTINCT CASE WHEN register_device = 'pc' THEN user_id END) AS register_pc
,COUNT(DISTINCT CASE WHEN register_device = 'sp' THEN user_id END) AS register_sp
,COUNT(DISTINCT CASE WHEN register_device = 'app' THEN user_id END) AS register_app
FROM mst_users_with_year_month
GROUP BY year_month
-- '로그 최근 일자'와 '사용자별 등록일의 다음날'을 계산하는 쿼리
WITH action_log_with_mst_users AS
(
SELECT
u.user_id
,u.register_date
,CAST(a.stamp AS date) AS action_date
,MAX(CAST(a.stamp AS date)) OVER() AS latest_date
,CAST(u.register_date::date + '1 day'::interval AS date) AS next_day_1
FROM mst_users AS u LEFT OUTER JOIN action_log AS a ON u.user_id = a.user_id
)
WITH action_log_with_mst_users AS
(
SELECT
u.user_id
,u.register_date
,CAST(a.stamp AS date) AS action_date
,MAX(CAST(a.stamp AS date)) OVER() AS latest_date
,CAST(u.register_date::date + '1 day'::interval AS date) AS next_day_1
FROM mst_users AS u LEFT OUTER JOIN action_log AS a ON u.user_id = a.user_id
)
SELECT *
FROM action_log_with_mst_users
ORDER BY register_date
-- 사용자의 액션 플래그를 계산하는 쿼리
WITH action_log_with_mst_users AS
(
-- 위 코드 참고
)
, user_action_flag AS
(
SELECT
user_id
,register_date
,SIGN(SUM(CASE WHEN next_day_1 <= latest_date THEN
CASE WHEN next_day_1 = action_date THEN 1 ELSE 0 END END)) AS next_1_day_action
FROM action_log_with_mst_users
GROUP BY user_id, register_date
)
SELECT *
FROM user_action_flag
ORDER BY register_date, user_id
-- 다음날 지속률을 계산하는 쿼리
WITH action_log_with_mst_users AS
(
-- 위 코드 참고
)
, user_action_flag AS
(
-- 위 코드 참고
)
SELECT
register_date
,AVG(100.0*next_1_day_action) AS repeat_rate_1_day
FROM user_action_flag
GROUP BY register_date
ORDER BY register_date
-- 지속률 지표를 관리하는 마스터 테이블을 작성하는 쿼리
WITH repeat_interval(index_name, interval_date) AS
(
VALUES
('01 day repeat', 1)
,('02 day repeat', 2)
,('03 day repeat', 3)
,('04 day repeat', 4)
,('05 day repeat', 5)
,('06 day repeat', 6)
,('07 day repeat', 7)
)
SELECT *
FROM repeat_interval
ORDER BY index_name
-- 지속률을 세로 기반으로 집계하는 쿼리
WITH repeat_interval AS(
-- 위 코드 참고하기
)
,action_log_with_index_date AS
(
SELECT
u.user_id
,u.register_date
,CAST(a.stamp AS date) AS action_date
,MAX(CAST(a.stamp AS date)) OVER() AS latest_date
,r.index_name
,CAST(CAST(u.register_date AS date) + interval '1 day' * r.interval_date AS date) AS index_date
FROM mst_users AS u LEFT OUTER JOIN action_log AS a ON u.user_id = a.user_id
CROSS JOIN repeat_interval AS r
),
user_action_flag AS
(
SELECT
user_id
,register_date
,index_name
,SIGN(
SUM(
CASE WHEN index_date <= latest_date THEN
CASE WHEN index_date = action_date THEN 1 ELSE 0 END END
)) AS index_date_action
FROM action_log_with_index_date
GROUP BY user_id, register_date, index_name, index_date
)
SELECT
register_date
,index_name
,AVG(100.0*index_date_action) AS repeat_rate
FROM user_action_flag
GROUP BY register_date, index_name
ORDER BY register_date, index_name
-- 12개월 후까지의 월을 도출하기 위한 보조 테이블을 만드는 쿼리
-- 등록 월에서 12개월 후까지의 잔존율을 집계하는 쿼리
WITH mst_intervals(interval_month) AS
(
VALUES (1), (2), (3), (4), (5), (6)
, (7), (8), (9), (10), (11), (12)
),
mst_users_with_index_month AS
(
SELECT
u.user_id
,u.register_date
,CAST(u.register_date::date + i.interval_month * '1 month' :: interval AS date) AS index_date
,substring(u.register_date,1,7) AS register_month
,substring(CAST(u.register_date::date + i.interval_month * '1 month'::interval AS text), 1, 7) AS index_month
FROM mst_users AS u CROSS JOIN mst_intervals AS i
),
action_log_in_month AS
(
SELECT DISTINCT user_id
,substring(stamp,1,7) AS action_month
FROM action_log
)
SELECT
u.register_month
,u.index_month
,SUM(CASE WHEN a.action_month IS NOT NULL THEN 1 ELSE 0 END) AS users
,AVG(CASE WHEN a.action_month IS NOT NULL THEN 100.0 ELSE 0.0 END) AS retention_rate
FROM mst_users_with_index_month AS u
LEFT JOIN
action_log_in_month AS a ON u.user_id = a.user_id AND u.index_month = a.action_month
GROUP BY u.register_month, u.index_month
ORDER BY u.register_month, u.index_month
-- 신규 사용자 수, 리피트 사용자 수, 컴백 사용자 수를 집계하는 쿼리
WITH monthly_user_action AS (
-- 월별 사용자 액션 집약하기
SELECT
DISTINCT u.user_id
,SUBSTRING(u.register_date, 1, 7) AS register_month
,SUBSTRING(l.stamp, 1, 7) AS action_month
,SUBSTRING(CAST(l.stamp::date - interval '1 month' AS text), 1, 7) AS action_month_priv
FROM mst_users AS u JOIN action_log AS l ON u.user_id = l.user_id
)
,monthly_user_with_type AS (
-- 월별 사용자 분류 테이블
SELECT
action_month
,user_id
,CASE WHEN register_month = action_month THEN 'new_user' -- 등록 월과 액션월이 일치하면 신규 사용잠
WHEN action_month_priv = LAG(action_month) -- 이전 월에 액션이 있다면 리피트 사용자
OVER(PARTITION BY user_id ORDER BY action_month) THEN 'repeat_user'
ELSE 'come_back_user' END AS c
,action_month_priv
FROM monthly_user_action
)
SELECT
action_month
-- 특정 달의 MAU
,COUNT(user_id) AS mau
,COUNT(CASE WHEN c = 'new_user' THEN 1 END) AS new_users
,COUNT(CASE WHEN c = 'repeat_user' THEN 1 END) AS repeat_users
,COUNT(CASE WHEN c = 'come_back_user' THEN 1 END) AS come_back_users
FROM monthly_user_with_type
GROUP BY action_month
ORDER BY action_month
'SQL' 카테고리의 다른 글
리트코드 SQL 50 (0) | 2024.04.09 |
---|---|
LeetCode 문제풀이 (1795) (0) | 2022.07.04 |
LeetCode 문제풀이 (1393, 1407, 1484, 1581) (1) | 2022.07.01 |
LeetCode 문제풀이 (626, 1158) (0) | 2022.06.27 |
LeetCode 문제풀이 (262) (0) | 2022.06.26 |