Skip to content

Intermediate Problems

Focus: CTE decomposition, windows, funnels, retention, deduplication, and temporal joins.

1. Compute each customer's share of total paid revenue.
WITH customer_revenue AS (
	SELECT customer_id, SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY customer_id
)
SELECT
	customer_id,
	revenue,
	revenue / NULLIF(SUM(revenue) OVER (), 0) AS revenue_share
FROM customer_revenue;

NULLIF avoids divide-by-zero.

2. Find customers whose latest order is cancelled.
WITH ranked AS (
	SELECT
		o.*,
		ROW_NUMBER() OVER (
			PARTITION BY customer_id
			ORDER BY order_ts DESC, order_id DESC
		) AS rn
	FROM orders o
)
SELECT customer_id, order_id, order_ts
FROM ranked
WHERE rn = 1
	AND status = 'cancelled';
3. Compute month-over-month paid revenue growth.

Assume order_month is available or derived upstream.

WITH monthly AS (
	SELECT
		order_month,
		SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY order_month
)
SELECT
	order_month,
	revenue,
	LAG(revenue) OVER (ORDER BY order_month) AS prev_revenue,
	(revenue - LAG(revenue) OVER (ORDER BY order_month))
		/ NULLIF(LAG(revenue) OVER (ORDER BY order_month), 0) AS growth_rate
FROM monthly;

Some engines require another CTE to avoid repeating LAG.

4. Deduplicate events by event_id, keeping the latest ingest.
WITH ranked AS (
	SELECT
		e.*,
		ROW_NUMBER() OVER (
			PARTITION BY event_id
			ORDER BY ingest_ts DESC
		) AS rn
	FROM raw_events e
)
SELECT *
FROM ranked
WHERE rn = 1;

If ingest_ts ties, add a deterministic secondary order.

5. Compute product revenue by category without double-counting order totals.

Use item-level revenue, not order-level total.

SELECT
	p.category,
	SUM(oi.quantity * oi.unit_price) AS item_revenue
FROM order_items oi
JOIN products p
	ON p.product_id = oi.product_id
JOIN orders o
	ON o.order_id = oi.order_id
WHERE o.status = 'paid'
GROUP BY p.category;
6. Find users who viewed a product before purchasing.
WITH user_steps AS (
	SELECT
		user_id,
		MIN(CASE WHEN event_name = 'view_product' THEN event_ts END) AS first_view_ts,
		MIN(CASE WHEN event_name = 'purchase' THEN event_ts END) AS first_purchase_ts
	FROM events
	GROUP BY user_id
)
SELECT user_id
FROM user_steps
WHERE first_view_ts IS NOT NULL
	AND first_purchase_ts > first_view_ts;

This answers first-view before first-purchase. If the question asks “any view before any purchase”, this works if first purchase after first view. More complex path logic may be required for repeated cycles.

7. Compute 7-day active users by day.

Use a date spine and a distinct user-day table.

WITH active_days AS (
	SELECT DISTINCT user_id, event_date
	FROM events
),
rolling AS (
	SELECT
		d.calendar_date,
		a.user_id
	FROM date_spine d
	JOIN active_days a
		ON a.event_date <= d.calendar_date
		AND a.event_date > d.calendar_date - INTERVAL '7' DAY
)
SELECT calendar_date, COUNT(DISTINCT user_id) AS wau
FROM rolling
GROUP BY calendar_date;

For large data, this can be expensive. Production systems often pre-aggregate or use specialized sketches.

8. Compute first-touch acquisition channel revenue.
WITH first_touch AS (
	SELECT
		customer_id,
		acquisition_channel
	FROM customers
),
customer_revenue AS (
	SELECT customer_id, SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY customer_id
)
SELECT
	f.acquisition_channel,
	SUM(COALESCE(r.revenue, 0)) AS revenue
FROM first_touch f
LEFT JOIN customer_revenue r
	ON r.customer_id = f.customer_id
GROUP BY f.acquisition_channel;

This assumes customers.acquisition_channel is first-touch. If channel can change, use an attribution table with explicit rules.

9. Find products whose revenue rank improved from last month to this month.
WITH product_month_revenue AS (
	SELECT
		order_month,
		oi.product_id,
		SUM(oi.quantity * oi.unit_price) AS revenue
	FROM orders o
	JOIN order_items oi
		ON oi.order_id = o.order_id
	WHERE o.status = 'paid'
	GROUP BY order_month, oi.product_id
),
ranked AS (
	SELECT
		*,
		RANK() OVER (
			PARTITION BY order_month
			ORDER BY revenue DESC
		) AS revenue_rank
	FROM product_month_revenue
),
with_previous AS (
	SELECT
		product_id,
		order_month,
		revenue_rank,
		LAG(revenue_rank) OVER (
			PARTITION BY product_id
			ORDER BY order_month
		) AS prev_rank
	FROM ranked
)
SELECT product_id, order_month, prev_rank, revenue_rank
FROM with_previous
WHERE prev_rank IS NOT NULL
	AND revenue_rank < prev_rank;

Lower rank number is better.

10. For each user, compute time between consecutive sessions.
SELECT
	user_id,
	session_id,
	started_at,
	LAG(ended_at) OVER (
		PARTITION BY user_id
		ORDER BY started_at, session_id
	) AS previous_session_ended_at
FROM sessions;

Then use engine-specific timestamp difference syntax.

11. Find customers who ordered in January but not February.
WITH jan AS (
	SELECT DISTINCT customer_id
	FROM orders
	WHERE order_ts >= TIMESTAMP '2026-01-01 00:00:00'
		AND order_ts < TIMESTAMP '2026-02-01 00:00:00'
),
feb AS (
	SELECT DISTINCT customer_id
	FROM orders
	WHERE order_ts >= TIMESTAMP '2026-02-01 00:00:00'
		AND order_ts < TIMESTAMP '2026-03-01 00:00:00'
)
SELECT j.customer_id
FROM jan j
WHERE NOT EXISTS (
	SELECT 1
	FROM feb f
	WHERE f.customer_id = j.customer_id
);
12. Compute conversion rate by experiment variant.
WITH assigned AS (
	SELECT user_id, variant, assigned_ts
	FROM experiments
	WHERE experiment_name = 'checkout_test'
),
converted AS (
	SELECT DISTINCT a.user_id
	FROM assigned a
	JOIN events e
		ON e.user_id = a.user_id
		AND e.event_name = 'purchase'
		AND e.event_ts >= a.assigned_ts
		AND e.event_ts < a.assigned_ts + INTERVAL '7' DAY
)
SELECT
	a.variant,
	COUNT(*) AS assigned_users,
	COUNT(c.user_id) AS converted_users,
	COUNT(c.user_id) * 1.0 / NULLIF(COUNT(*), 0) AS conversion_rate
FROM assigned a
LEFT JOIN converted c
	ON c.user_id = a.user_id
GROUP BY a.variant;

This preserves assigned users with no conversion.

13. Find each customer's largest order and include ties.
WITH ranked AS (
	SELECT
		o.*,
		RANK() OVER (
			PARTITION BY customer_id
			ORDER BY total_amount DESC
		) AS amount_rank
	FROM orders o
	WHERE status = 'paid'
)
SELECT customer_id, order_id, total_amount
FROM ranked
WHERE amount_rank = 1;

Use ROW_NUMBER if only one order per customer should be returned.

14. Compute cumulative paid revenue by day.
WITH daily AS (
	SELECT order_date, SUM(total_amount) AS revenue
	FROM orders
	WHERE status = 'paid'
	GROUP BY order_date
)
SELECT
	order_date,
	revenue,
	SUM(revenue) OVER (
		ORDER BY order_date
		ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
	) AS cumulative_revenue
FROM daily;
15. Find events that occur before customer signup.
SELECT
	e.event_id,
	e.user_id,
	e.event_ts,
	c.signup_ts
FROM events e
JOIN customers c
	ON c.customer_id = e.user_id
WHERE e.event_ts < c.signup_ts;

This is a data quality check. The ID mapping may differ in real systems.