Aggregation and Window Drills
Focus on whether the query collapses rows or annotates rows.
1. Compute paid revenue and order count by customer.
SELECT
customer_id,
COUNT(*) AS paid_orders,
SUM(total_amount) AS paid_revenue
FROM orders
WHERE status = 'paid'
GROUP BY customer_id;
Output grain is one row per customer.
2. Compute each order's percent of customer lifetime paid revenue.
SELECT
order_id,
customer_id,
total_amount,
total_amount / NULLIF(SUM(total_amount) OVER (PARTITION BY customer_id), 0) AS pct_customer_revenue
FROM orders
WHERE status = 'paid';
Window aggregation preserves order grain.
3. Top 5 customers by revenue per country.
WITH revenue AS (
SELECT
c.country,
o.customer_id,
SUM(o.total_amount) AS paid_revenue
FROM orders o
JOIN customers c
ON c.customer_id = o.customer_id
WHERE o.status = 'paid'
GROUP BY c.country, o.customer_id
),
ranked AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY country
ORDER BY paid_revenue DESC, customer_id
) AS rn
FROM revenue
)
SELECT country, customer_id, paid_revenue
FROM ranked
WHERE rn <= 5;
Use DENSE_RANK instead if ties should all be included.
4. Running paid revenue by customer.
SELECT
customer_id,
order_id,
order_ts,
total_amount,
SUM(total_amount) OVER (
PARTITION BY customer_id
ORDER BY order_ts, order_id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS running_revenue
FROM orders
WHERE status = 'paid';
Explicit ROWS frame avoids peer surprises.
5. Find the second paid order per customer.
WITH ranked AS (
SELECT
o.*,
ROW_NUMBER() OVER (
PARTITION BY customer_id
ORDER BY order_ts, order_id
) AS rn
FROM orders o
WHERE status = 'paid'
)
SELECT customer_id, order_id, order_ts
FROM ranked
WHERE rn = 2;
6. Compute average days between paid orders per customer.
First compute previous timestamp; use engine-specific timestamp diff in the final projection.
WITH ordered AS (
SELECT
customer_id,
order_id,
order_ts,
LAG(order_ts) OVER (
PARTITION BY customer_id
ORDER BY order_ts, order_id
) AS prev_order_ts
FROM orders
WHERE status = 'paid'
)
SELECT customer_id, AVG(order_ts - prev_order_ts) AS avg_gap
FROM ordered
WHERE prev_order_ts IS NOT NULL
GROUP BY customer_id;
The interval subtraction syntax varies by engine.
7. Compute daily revenue and 28-day moving average.
WITH daily AS (
SELECT order_date, SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY order_date
)
SELECT
order_date,
revenue,
AVG(revenue) OVER (
ORDER BY order_date
ROWS BETWEEN 27 PRECEDING AND CURRENT ROW
) AS revenue_ma_28_rows
FROM daily;
This is 28 rows. Use a date spine for strict 28 calendar days.
8. Compute category revenue rank within each month.
WITH category_month AS (
SELECT
o.order_month,
p.category,
SUM(oi.quantity * oi.unit_price) AS revenue
FROM orders o
JOIN order_items oi
ON oi.order_id = o.order_id
JOIN products p
ON p.product_id = oi.product_id
WHERE o.status = 'paid'
GROUP BY o.order_month, p.category
)
SELECT
*,
RANK() OVER (
PARTITION BY order_month
ORDER BY revenue DESC
) AS revenue_rank
FROM category_month;
9. Compute the first and last paid order timestamps per customer.
SELECT
customer_id,
MIN(order_ts) AS first_paid_order_ts,
MAX(order_ts) AS last_paid_order_ts
FROM orders
WHERE status = 'paid'
GROUP BY customer_id;
If you need the full first/last order rows, use window ranking.
10. Find customers whose paid revenue is above their country average.
WITH customer_revenue AS (
SELECT
c.country,
o.customer_id,
SUM(o.total_amount) AS revenue
FROM orders o
JOIN customers c
ON c.customer_id = o.customer_id
WHERE o.status = 'paid'
GROUP BY c.country, o.customer_id
),
with_avg AS (
SELECT
*,
AVG(revenue) OVER (PARTITION BY country) AS country_avg_revenue
FROM customer_revenue
)
SELECT country, customer_id, revenue, country_avg_revenue
FROM with_avg
WHERE revenue > country_avg_revenue;
11. Compute order count distribution by customer.
WITH counts AS (
SELECT customer_id, COUNT(*) AS order_count
FROM orders
GROUP BY customer_id
)
SELECT order_count, COUNT(*) AS customers
FROM counts
GROUP BY order_count
ORDER BY order_count;
This produces a histogram of customer order counts.
12. Bucket customers into revenue deciles.
WITH revenue AS (
SELECT customer_id, SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY customer_id
)
SELECT
customer_id,
revenue,
NTILE(10) OVER (ORDER BY revenue) AS revenue_decile
FROM revenue;
NTILE creates roughly equal row-count buckets.
13. Compute cumulative share of revenue by customer.
WITH revenue AS (
SELECT customer_id, SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY customer_id
),
ranked AS (
SELECT
customer_id,
revenue,
SUM(revenue) OVER (
ORDER BY revenue DESC, customer_id
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cumulative_revenue,
SUM(revenue) OVER () AS total_revenue
FROM revenue
)
SELECT
customer_id,
revenue,
cumulative_revenue / NULLIF(total_revenue, 0) AS cumulative_revenue_share
FROM ranked;
14. Find the modal acquisition channel by country.
WITH counts AS (
SELECT country, acquisition_channel, COUNT(*) AS customers
FROM customers
GROUP BY country, acquisition_channel
),
ranked AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY country
ORDER BY customers DESC, acquisition_channel
) AS rn
FROM counts
)
SELECT country, acquisition_channel, customers
FROM ranked
WHERE rn = 1;
15. Compute daily active users and previous day's active users.
WITH daily AS (
SELECT event_date, COUNT(DISTINCT user_id) AS active_users
FROM events
GROUP BY event_date
)
SELECT
event_date,
active_users,
LAG(active_users) OVER (ORDER BY event_date) AS previous_active_users
FROM daily;
16. Find customers whose order amount increased three orders in a row.
WITH ordered AS (
SELECT
customer_id,
order_id,
order_ts,
total_amount,
LAG(total_amount, 1) OVER (
PARTITION BY customer_id
ORDER BY order_ts, order_id
) AS prev_amount,
LAG(total_amount, 2) OVER (
PARTITION BY customer_id
ORDER BY order_ts, order_id
) AS prev_prev_amount
FROM orders
WHERE status = 'paid'
)
SELECT DISTINCT customer_id
FROM ordered
WHERE prev_prev_amount < prev_amount
AND prev_amount < total_amount;
Window offset syntax is widely supported, but exact function signatures can vary.
17. Count orders by customer revenue tier.
WITH customer_revenue AS (
SELECT customer_id, SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY customer_id
),
tiered AS (
SELECT
customer_id,
CASE
WHEN revenue >= 10000 THEN 'high'
WHEN revenue >= 1000 THEN 'medium'
ELSE 'low'
END AS revenue_tier
FROM customer_revenue
)
SELECT revenue_tier, COUNT(*) AS customers
FROM tiered
GROUP BY revenue_tier;
18. Compute average order value by first-order month.
WITH first_order AS (
SELECT customer_id, MIN(order_month) AS cohort_month
FROM orders
WHERE status = 'paid'
GROUP BY customer_id
)
SELECT
f.cohort_month,
AVG(o.total_amount) AS avg_order_value
FROM first_order f
JOIN orders o
ON o.customer_id = f.customer_id
WHERE o.status = 'paid'
GROUP BY f.cohort_month;
This groups all future orders by first paid order month.
19. Find the highest revenue order per day, including ties.
WITH ranked AS (
SELECT
order_id,
order_date,
total_amount,
RANK() OVER (
PARTITION BY order_date
ORDER BY total_amount DESC
) AS rnk
FROM orders
WHERE status = 'paid'
)
SELECT order_date, order_id, total_amount
FROM ranked
WHERE rnk = 1;
RANK includes ties.
20. Compute product category revenue share within each month.
WITH category_month AS (
SELECT
o.order_month,
p.category,
SUM(oi.quantity * oi.unit_price) AS revenue
FROM orders o
JOIN order_items oi
ON oi.order_id = o.order_id
JOIN products p
ON p.product_id = oi.product_id
WHERE o.status = 'paid'
GROUP BY o.order_month, p.category
)
SELECT
order_month,
category,
revenue,
revenue / NULLIF(SUM(revenue) OVER (PARTITION BY order_month), 0) AS month_revenue_share
FROM category_month;
21. Find customers whose first paid order amount is greater than their average paid order amount.
WITH annotated AS (
SELECT
o.*,
ROW_NUMBER() OVER (
PARTITION BY customer_id
ORDER BY order_ts, order_id
) AS rn,
AVG(total_amount) OVER (PARTITION BY customer_id) AS avg_amount
FROM orders o
WHERE status = 'paid'
)
SELECT customer_id, order_id, total_amount, avg_amount
FROM annotated
WHERE rn = 1
AND total_amount > avg_amount;
22. Compute month-to-date revenue by day.
WITH daily AS (
SELECT order_month, order_date, SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY order_month, order_date
)
SELECT
order_date,
revenue,
SUM(revenue) OVER (
PARTITION BY order_month
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS month_to_date_revenue
FROM daily;
23. Count orders above the customer's own average order value.
WITH annotated AS (
SELECT
o.*,
AVG(total_amount) OVER (PARTITION BY customer_id) AS avg_customer_amount
FROM orders o
WHERE status = 'paid'
)
SELECT
customer_id,
SUM(CASE WHEN total_amount > avg_customer_amount THEN 1 ELSE 0 END) AS orders_above_customer_avg
FROM annotated
GROUP BY customer_id;
24. Compute exact median from ordered row numbers.
WITH ordered AS (
SELECT
total_amount,
ROW_NUMBER() OVER (ORDER BY total_amount) AS rn,
COUNT(*) OVER () AS cnt
FROM orders
WHERE status = 'paid'
)
SELECT AVG(total_amount) AS median_paid_order_value
FROM ordered
WHERE rn IN ((cnt + 1) / 2, (cnt + 2) / 2);
Integer division behavior varies by engine; adjust syntax as needed.
25. Explain why ORDER BY inside a CTE does not guarantee final output order.
Relations are unordered unless the outermost query orders them. Some engines may preserve order accidentally in simple plans, but it is not a portable contract. Put ORDER BY in the final query when output order matters.