Skip to content

Aggregation and Window Drills

Focus on whether the query collapses rows or annotates rows.

1. Compute paid revenue and order count by customer.
SELECT
	customer_id,
	COUNT(*) AS paid_orders,
	SUM(total_amount) AS paid_revenue
FROM orders
WHERE status = 'paid'
GROUP BY customer_id;

Output grain is one row per customer.

2. Compute each order's percent of customer lifetime paid revenue.
SELECT
	order_id,
	customer_id,
	total_amount,
	total_amount / NULLIF(SUM(total_amount) OVER (PARTITION BY customer_id), 0) AS pct_customer_revenue
FROM orders
WHERE status = 'paid';

Window aggregation preserves order grain.

3. Top 5 customers by revenue per country.
WITH revenue AS (
	SELECT
		c.country,
		o.customer_id,
		SUM(o.total_amount) AS paid_revenue
	FROM orders o
	JOIN customers c
		ON c.customer_id = o.customer_id
	WHERE o.status = 'paid'
	GROUP BY c.country, o.customer_id
),
ranked AS (
	SELECT
		*,
		ROW_NUMBER() OVER (
			PARTITION BY country
			ORDER BY paid_revenue DESC, customer_id
		) AS rn
	FROM revenue
)
SELECT country, customer_id, paid_revenue
FROM ranked
WHERE rn <= 5;

Use DENSE_RANK instead if ties should all be included.

4. Running paid revenue by customer.
SELECT
	customer_id,
	order_id,
	order_ts,
	total_amount,
	SUM(total_amount) OVER (
		PARTITION BY customer_id
		ORDER BY order_ts, order_id
		ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
	) AS running_revenue
FROM orders
WHERE status = 'paid';

Explicit ROWS frame avoids peer surprises.

5. Find the second paid order per customer.
WITH ranked AS (
	SELECT
		o.*,
		ROW_NUMBER() OVER (
			PARTITION BY customer_id
			ORDER BY order_ts, order_id
		) AS rn
	FROM orders o
	WHERE status = 'paid'
)
SELECT customer_id, order_id, order_ts
FROM ranked
WHERE rn = 2;
6. Compute average days between paid orders per customer.

First compute previous timestamp; use engine-specific timestamp diff in the final projection.

WITH ordered AS (
	SELECT
		customer_id,
		order_id,
		order_ts,
		LAG(order_ts) OVER (
			PARTITION BY customer_id
			ORDER BY order_ts, order_id
		) AS prev_order_ts
	FROM orders
	WHERE status = 'paid'
)
SELECT customer_id, AVG(order_ts - prev_order_ts) AS avg_gap
FROM ordered
WHERE prev_order_ts IS NOT NULL
GROUP BY customer_id;

The interval subtraction syntax varies by engine.

7. Compute daily revenue and 28-day moving average.
WITH daily AS (
	SELECT order_date, SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY order_date
)
SELECT
	order_date,
	revenue,
	AVG(revenue) OVER (
		ORDER BY order_date
		ROWS BETWEEN 27 PRECEDING AND CURRENT ROW
	) AS revenue_ma_28_rows
FROM daily;

This is 28 rows. Use a date spine for strict 28 calendar days.

8. Compute category revenue rank within each month.
WITH category_month AS (
	SELECT
		o.order_month,
		p.category,
		SUM(oi.quantity * oi.unit_price) AS revenue
	FROM orders o
	JOIN order_items oi
		ON oi.order_id = o.order_id
	JOIN products p
		ON p.product_id = oi.product_id
	WHERE o.status = 'paid'
	GROUP BY o.order_month, p.category
)
SELECT
	*,
	RANK() OVER (
		PARTITION BY order_month
		ORDER BY revenue DESC
	) AS revenue_rank
FROM category_month;
9. Compute the first and last paid order timestamps per customer.
SELECT
	customer_id,
	MIN(order_ts) AS first_paid_order_ts,
	MAX(order_ts) AS last_paid_order_ts
FROM orders
WHERE status = 'paid'
GROUP BY customer_id;

If you need the full first/last order rows, use window ranking.

10. Find customers whose paid revenue is above their country average.
WITH customer_revenue AS (
	SELECT
		c.country,
		o.customer_id,
		SUM(o.total_amount) AS revenue
	FROM orders o
	JOIN customers c
		ON c.customer_id = o.customer_id
	WHERE o.status = 'paid'
	GROUP BY c.country, o.customer_id
),
with_avg AS (
	SELECT
		*,
		AVG(revenue) OVER (PARTITION BY country) AS country_avg_revenue
	FROM customer_revenue
)
SELECT country, customer_id, revenue, country_avg_revenue
FROM with_avg
WHERE revenue > country_avg_revenue;
11. Compute order count distribution by customer.
WITH counts AS (
	SELECT customer_id, COUNT(*) AS order_count
	FROM orders
	GROUP BY customer_id
)
SELECT order_count, COUNT(*) AS customers
FROM counts
GROUP BY order_count
ORDER BY order_count;

This produces a histogram of customer order counts.

12. Bucket customers into revenue deciles.
WITH revenue AS (
	SELECT customer_id, SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY customer_id
)
SELECT
	customer_id,
	revenue,
	NTILE(10) OVER (ORDER BY revenue) AS revenue_decile
FROM revenue;

NTILE creates roughly equal row-count buckets.

13. Compute cumulative share of revenue by customer.
WITH revenue AS (
	SELECT customer_id, SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY customer_id
),
ranked AS (
	SELECT
		customer_id,
		revenue,
		SUM(revenue) OVER (
			ORDER BY revenue DESC, customer_id
			ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
		) AS cumulative_revenue,
		SUM(revenue) OVER () AS total_revenue
	FROM revenue
)
SELECT
	customer_id,
	revenue,
	cumulative_revenue / NULLIF(total_revenue, 0) AS cumulative_revenue_share
FROM ranked;
14. Find the modal acquisition channel by country.
WITH counts AS (
	SELECT country, acquisition_channel, COUNT(*) AS customers
	FROM customers
	GROUP BY country, acquisition_channel
),
ranked AS (
	SELECT
		*,
		ROW_NUMBER() OVER (
			PARTITION BY country
			ORDER BY customers DESC, acquisition_channel
		) AS rn
	FROM counts
)
SELECT country, acquisition_channel, customers
FROM ranked
WHERE rn = 1;
15. Compute daily active users and previous day's active users.
WITH daily AS (
	SELECT event_date, COUNT(DISTINCT user_id) AS active_users
	FROM events
	GROUP BY event_date
)
SELECT
	event_date,
	active_users,
	LAG(active_users) OVER (ORDER BY event_date) AS previous_active_users
FROM daily;
16. Find customers whose order amount increased three orders in a row.
WITH ordered AS (
	SELECT
		customer_id,
		order_id,
		order_ts,
		total_amount,
		LAG(total_amount, 1) OVER (
			PARTITION BY customer_id
			ORDER BY order_ts, order_id
		) AS prev_amount,
		LAG(total_amount, 2) OVER (
			PARTITION BY customer_id
			ORDER BY order_ts, order_id
		) AS prev_prev_amount
	FROM orders
	WHERE status = 'paid'
)
SELECT DISTINCT customer_id
FROM ordered
WHERE prev_prev_amount < prev_amount
	AND prev_amount < total_amount;

Window offset syntax is widely supported, but exact function signatures can vary.

17. Count orders by customer revenue tier.
WITH customer_revenue AS (
	SELECT customer_id, SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY customer_id
),
tiered AS (
	SELECT
		customer_id,
		CASE
			WHEN revenue >= 10000 THEN 'high'
			WHEN revenue >= 1000 THEN 'medium'
			ELSE 'low'
		END AS revenue_tier
	FROM customer_revenue
)
SELECT revenue_tier, COUNT(*) AS customers
FROM tiered
GROUP BY revenue_tier;
18. Compute average order value by first-order month.
WITH first_order AS (
	SELECT customer_id, MIN(order_month) AS cohort_month
	FROM orders
	WHERE status = 'paid'
	GROUP BY customer_id
)
SELECT
	f.cohort_month,
	AVG(o.total_amount) AS avg_order_value
FROM first_order f
JOIN orders o
	ON o.customer_id = f.customer_id
WHERE o.status = 'paid'
GROUP BY f.cohort_month;

This groups all future orders by first paid order month.

19. Find the highest revenue order per day, including ties.
WITH ranked AS (
	SELECT
		order_id,
		order_date,
		total_amount,
		RANK() OVER (
			PARTITION BY order_date
			ORDER BY total_amount DESC
		) AS rnk
	FROM orders
	WHERE status = 'paid'
)
SELECT order_date, order_id, total_amount
FROM ranked
WHERE rnk = 1;

RANK includes ties.

20. Compute product category revenue share within each month.
WITH category_month AS (
	SELECT
		o.order_month,
		p.category,
		SUM(oi.quantity * oi.unit_price) AS revenue
	FROM orders o
	JOIN order_items oi
		ON oi.order_id = o.order_id
	JOIN products p
		ON p.product_id = oi.product_id
	WHERE o.status = 'paid'
	GROUP BY o.order_month, p.category
)
SELECT
	order_month,
	category,
	revenue,
	revenue / NULLIF(SUM(revenue) OVER (PARTITION BY order_month), 0) AS month_revenue_share
FROM category_month;
21. Find customers whose first paid order amount is greater than their average paid order amount.
WITH annotated AS (
	SELECT
		o.*,
		ROW_NUMBER() OVER (
			PARTITION BY customer_id
			ORDER BY order_ts, order_id
		) AS rn,
		AVG(total_amount) OVER (PARTITION BY customer_id) AS avg_amount
	FROM orders o
	WHERE status = 'paid'
)
SELECT customer_id, order_id, total_amount, avg_amount
FROM annotated
WHERE rn = 1
	AND total_amount > avg_amount;
22. Compute month-to-date revenue by day.
WITH daily AS (
	SELECT order_month, order_date, SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY order_month, order_date
)
SELECT
	order_date,
	revenue,
	SUM(revenue) OVER (
		PARTITION BY order_month
		ORDER BY order_date
		ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
	) AS month_to_date_revenue
FROM daily;
23. Count orders above the customer's own average order value.
WITH annotated AS (
	SELECT
		o.*,
		AVG(total_amount) OVER (PARTITION BY customer_id) AS avg_customer_amount
	FROM orders o
	WHERE status = 'paid'
)
SELECT
	customer_id,
	SUM(CASE WHEN total_amount > avg_customer_amount THEN 1 ELSE 0 END) AS orders_above_customer_avg
FROM annotated
GROUP BY customer_id;
24. Compute exact median from ordered row numbers.
WITH ordered AS (
	SELECT
		total_amount,
		ROW_NUMBER() OVER (ORDER BY total_amount) AS rn,
		COUNT(*) OVER () AS cnt
	FROM orders
	WHERE status = 'paid'
)
SELECT AVG(total_amount) AS median_paid_order_value
FROM ordered
WHERE rn IN ((cnt + 1) / 2, (cnt + 2) / 2);

Integer division behavior varies by engine; adjust syntax as needed.

25. Explain why ORDER BY inside a CTE does not guarantee final output order.

Relations are unordered unless the outermost query orders them. Some engines may preserve order accidentally in simple plans, but it is not a portable contract. Put ORDER BY in the final query when output order matters.