SQL 数据分析
SQL 是数据分析中最常用的查询语言,用于从数据库中提取和分析数据。
查询优化
索引优化
sql
-- 创建单列索引
CREATE INDEX idx_users_email ON users(email);
-- 创建复合索引
CREATE INDEX idx_orders_customer_date ON orders(customer_id, order_date);
-- 创建唯一索引
CREATE UNIQUE INDEX idx_users_username ON users(username);
-- 删除索引
DROP INDEX idx_users_email;查询重写
sql
-- 避免 SELECT *
SELECT name, email FROM users WHERE id = 1;
-- 使用 JOIN 代替子查询
SELECT u.name, o.order_id
FROM users u
JOIN orders o ON u.id = o.customer_id
WHERE u.country = 'China';
-- 使用 EXISTS 代替 IN
SELECT name
FROM users u
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.customer_id = u.id
);窗口函数
排名函数
sql
-- ROW_NUMBER: 连续排名
SELECT
name,
score,
ROW_NUMBER() OVER (ORDER BY score DESC) as row_num
FROM students;
-- RANK: 跳跃排名
SELECT
name,
score,
RANK() OVER (ORDER BY score DESC) as rank
FROM students;
-- DENSE_RANK: 密集排名
SELECT
name,
score,
DENSE_RANK() OVER (ORDER BY score DESC) as dense_rank
FROM students;聚合窗口函数
sql
-- 计算累计和
SELECT
date,
sales,
SUM(sales) OVER (ORDER BY date) as cumulative_sales
FROM daily_sales;
-- 移动平均
SELECT
date,
sales,
AVG(sales) OVER (
ORDER BY date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) as moving_avg
FROM daily_sales;
-- 分组统计
SELECT
department,
name,
salary,
AVG(salary) OVER (PARTITION BY department) as dept_avg
FROM employees;CTE 表达式
基本 CTE
sql
WITH monthly_sales AS (
SELECT
DATE_TRUNC('month', order_date) as month,
SUM(amount) as total_sales
FROM orders
GROUP BY DATE_TRUNC('month', order_date)
)
SELECT
month,
total_sales,
LAG(total_sales) OVER (ORDER BY month) as prev_month_sales
FROM monthly_sales;递归 CTE
sql
-- 查询组织层级
WITH RECURSIVE org_hierarchy AS (
-- 基础情况:顶级员工
SELECT
id,
name,
manager_id,
1 as level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归情况:下级员工
SELECT
e.id,
e.name,
e.manager_id,
oh.level + 1 as level
FROM employees e
JOIN org_hierarchy oh ON e.manager_id = oh.id
)
SELECT * FROM org_hierarchy;数据透视
PIVOT 操作
sql
-- PostgreSQL
SELECT *
FROM (
SELECT department, category, amount
FROM expenses
) AS source
PIVOT (
SUM(amount)
FOR category IN ('salary', 'office', 'travel')
) AS pivot_table;
-- MySQL 使用 CASE
SELECT
department,
SUM(CASE WHEN category = 'salary' THEN amount ELSE 0 END) as salary,
SUM(CASE WHEN category = 'office' THEN amount ELSE 0 END) as office,
SUM(CASE WHEN category = 'travel' THEN amount ELSE 0 END) as travel
FROM expenses
GROUP BY department;性能分析
EXPLAIN 分析
sql
EXPLAIN ANALYZE
SELECT u.name, COUNT(o.id) as order_count
FROM users u
LEFT JOIN orders o ON u.id = o.customer_id
WHERE u.created_at > '2024-01-01'
GROUP BY u.id, u.name
ORDER BY order_count DESC
LIMIT 10;慢查询日志
sql
-- MySQL 开启慢查询日志
SET GLOBAL slow_query_log = 'ON';
SET GLOBAL long_query_time = 2; -- 超过 2 秒的查询记录
SET GLOBAL slow_query_log_file = '/var/log/mysql/slow.log';