SQL 窗口函数
窗口函数是 SQL 中强大的数据分析工具,可以在不分组的情况下进行聚合计算。
基本语法
sql
SELECT
column1,
column2,
FUNCTION(column) OVER (
PARTITION BY partition_column
ORDER BY sort_column
ROWS/RANGE BETWEEN start AND end
) AS window_result
FROM table_name;常用窗口函数
聚合函数
sql
-- SUM OVER
SELECT
id,
department,
salary,
SUM(salary) OVER (PARTITION BY department) AS total_salary,
AVG(salary) OVER (PARTITION BY department) AS avg_salary,
MAX(salary) OVER (PARTITION BY department) AS max_salary,
MIN(salary) OVER (PARTITION BY department) AS min_salary,
COUNT(*) OVER (PARTITION BY department) AS employee_count
FROM employees;排名函数
sql
-- ROW_NUMBER
SELECT
id,
department,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rank,
DENSE_RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS dense_rank,
NTILE(4) OVER (PARTITION BY department ORDER BY salary DESC) AS quartile
FROM employees;偏移函数
sql
-- LAG 和 LEAD
SELECT
id,
date,
sales,
LAG(sales) OVER (ORDER BY date) AS prev_day_sales,
LAG(sales, 2) OVER (ORDER BY date) AS two_days_ago_sales,
LEAD(sales) OVER (ORDER BY date) AS next_day_sales,
LEAD(sales, 2) OVER (ORDER BY date) AS two_days_later_sales
FROM daily_sales;累计函数
sql
-- 累计求和
SELECT
id,
date,
sales,
SUM(sales) OVER (ORDER BY date) AS cumulative_sales,
SUM(sales) OVER (PARTITION BY year ORDER BY date) AS yearly_cumulative_sales
FROM monthly_sales;
-- 移动平均
SELECT
id,
date,
sales,
AVG(sales) OVER (ORDER BY date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg_3days
FROM daily_sales;窗口框架
ROWS 框架
sql
-- 前一行到当前行
SELECT
id,
sales,
SUM(sales) OVER (ORDER BY id ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS two_row_sum
FROM orders;
-- 前两行到后一行
SELECT
id,
sales,
SUM(sales) OVER (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 1 FOLLOWING) AS four_row_sum
FROM orders;RANGE 框架
sql
-- 相同值的范围
SELECT
id,
score,
COUNT(*) OVER (ORDER BY score RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS cumulative_count
FROM students;实际应用
计算同比增长率
sql
SELECT
date,
sales,
LAG(sales, 12) OVER (ORDER BY date) AS sales_last_year,
(sales - LAG(sales, 12) OVER (ORDER BY date)) / LAG(sales, 12) OVER (ORDER BY date) AS yoy_growth
FROM monthly_sales;计算累计占比
sql
SELECT
id,
department,
salary,
SUM(salary) OVER (PARTITION BY department ORDER BY salary DESC) AS cumulative_salary,
SUM(salary) OVER (PARTITION BY department) AS total_salary,
SUM(salary) OVER (PARTITION BY department ORDER BY salary DESC) / SUM(salary) OVER (PARTITION BY department) AS cumulative_percentage
FROM employees;Top N 查询
sql
-- 每个部门工资最高的前3名员工
WITH ranked_employees AS (
SELECT
id,
department,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rn
FROM employees
)
SELECT * FROM ranked_employees WHERE rn <= 3;注意事项
- 窗口函数不会减少行数: 与 GROUP BY 不同
- PARTITION BY 是可选的: 不指定则对整个结果集计算
- ORDER BY 影响窗口框架: 未指定时可能影响结果
- 性能考虑: 大数据集上使用窗口函数需要注意性能