Intermediate Problems
Focus: CTE decomposition, windows, funnels, retention, deduplication, and temporal joins.
1. Compute each customer's share of total paid revenue.
WITH customer_revenue AS (
SELECT customer_id, SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY customer_id
)
SELECT
customer_id,
revenue,
revenue / NULLIF(SUM(revenue) OVER (), 0) AS revenue_share
FROM customer_revenue;
NULLIF avoids divide-by-zero.
2. Find customers whose latest order is cancelled.
WITH ranked AS (
SELECT
o.*,
ROW_NUMBER() OVER (
PARTITION BY customer_id
ORDER BY order_ts DESC, order_id DESC
) AS rn
FROM orders o
)
SELECT customer_id, order_id, order_ts
FROM ranked
WHERE rn = 1
AND status = 'cancelled';
3. Compute month-over-month paid revenue growth.
Assume order_month is available or derived upstream.
WITH monthly AS (
SELECT
order_month,
SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY order_month
)
SELECT
order_month,
revenue,
LAG(revenue) OVER (ORDER BY order_month) AS prev_revenue,
(revenue - LAG(revenue) OVER (ORDER BY order_month))
/ NULLIF(LAG(revenue) OVER (ORDER BY order_month), 0) AS growth_rate
FROM monthly;
Some engines require another CTE to avoid repeating LAG.
4. Deduplicate events by event_id, keeping the latest ingest.
WITH ranked AS (
SELECT
e.*,
ROW_NUMBER() OVER (
PARTITION BY event_id
ORDER BY ingest_ts DESC
) AS rn
FROM raw_events e
)
SELECT *
FROM ranked
WHERE rn = 1;
If ingest_ts ties, add a deterministic secondary order.
5. Compute product revenue by category without double-counting order totals.
Use item-level revenue, not order-level total.
SELECT
p.category,
SUM(oi.quantity * oi.unit_price) AS item_revenue
FROM order_items oi
JOIN products p
ON p.product_id = oi.product_id
JOIN orders o
ON o.order_id = oi.order_id
WHERE o.status = 'paid'
GROUP BY p.category;
6. Find users who viewed a product before purchasing.
WITH user_steps AS (
SELECT
user_id,
MIN(CASE WHEN event_name = 'view_product' THEN event_ts END) AS first_view_ts,
MIN(CASE WHEN event_name = 'purchase' THEN event_ts END) AS first_purchase_ts
FROM events
GROUP BY user_id
)
SELECT user_id
FROM user_steps
WHERE first_view_ts IS NOT NULL
AND first_purchase_ts > first_view_ts;
This answers first-view before first-purchase. If the question asks “any view before any purchase”, this works if first purchase after first view. More complex path logic may be required for repeated cycles.
7. Compute 7-day active users by day.
Use a date spine and a distinct user-day table.
WITH active_days AS (
SELECT DISTINCT user_id, event_date
FROM events
),
rolling AS (
SELECT
d.calendar_date,
a.user_id
FROM date_spine d
JOIN active_days a
ON a.event_date <= d.calendar_date
AND a.event_date > d.calendar_date - INTERVAL '7' DAY
)
SELECT calendar_date, COUNT(DISTINCT user_id) AS wau
FROM rolling
GROUP BY calendar_date;
For large data, this can be expensive. Production systems often pre-aggregate or use specialized sketches.
8. Compute first-touch acquisition channel revenue.
WITH first_touch AS (
SELECT
customer_id,
acquisition_channel
FROM customers
),
customer_revenue AS (
SELECT customer_id, SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY customer_id
)
SELECT
f.acquisition_channel,
SUM(COALESCE(r.revenue, 0)) AS revenue
FROM first_touch f
LEFT JOIN customer_revenue r
ON r.customer_id = f.customer_id
GROUP BY f.acquisition_channel;
This assumes customers.acquisition_channel is first-touch. If channel can change, use an attribution table with explicit rules.
9. Find products whose revenue rank improved from last month to this month.
WITH product_month_revenue AS (
SELECT
order_month,
oi.product_id,
SUM(oi.quantity * oi.unit_price) AS revenue
FROM orders o
JOIN order_items oi
ON oi.order_id = o.order_id
WHERE o.status = 'paid'
GROUP BY order_month, oi.product_id
),
ranked AS (
SELECT
*,
RANK() OVER (
PARTITION BY order_month
ORDER BY revenue DESC
) AS revenue_rank
FROM product_month_revenue
),
with_previous AS (
SELECT
product_id,
order_month,
revenue_rank,
LAG(revenue_rank) OVER (
PARTITION BY product_id
ORDER BY order_month
) AS prev_rank
FROM ranked
)
SELECT product_id, order_month, prev_rank, revenue_rank
FROM with_previous
WHERE prev_rank IS NOT NULL
AND revenue_rank < prev_rank;
Lower rank number is better.
10. For each user, compute time between consecutive sessions.
SELECT
user_id,
session_id,
started_at,
LAG(ended_at) OVER (
PARTITION BY user_id
ORDER BY started_at, session_id
) AS previous_session_ended_at
FROM sessions;
Then use engine-specific timestamp difference syntax.
11. Find customers who ordered in January but not February.
WITH jan AS (
SELECT DISTINCT customer_id
FROM orders
WHERE order_ts >= TIMESTAMP '2026-01-01 00:00:00'
AND order_ts < TIMESTAMP '2026-02-01 00:00:00'
),
feb AS (
SELECT DISTINCT customer_id
FROM orders
WHERE order_ts >= TIMESTAMP '2026-02-01 00:00:00'
AND order_ts < TIMESTAMP '2026-03-01 00:00:00'
)
SELECT j.customer_id
FROM jan j
WHERE NOT EXISTS (
SELECT 1
FROM feb f
WHERE f.customer_id = j.customer_id
);
12. Compute conversion rate by experiment variant.
WITH assigned AS (
SELECT user_id, variant, assigned_ts
FROM experiments
WHERE experiment_name = 'checkout_test'
),
converted AS (
SELECT DISTINCT a.user_id
FROM assigned a
JOIN events e
ON e.user_id = a.user_id
AND e.event_name = 'purchase'
AND e.event_ts >= a.assigned_ts
AND e.event_ts < a.assigned_ts + INTERVAL '7' DAY
)
SELECT
a.variant,
COUNT(*) AS assigned_users,
COUNT(c.user_id) AS converted_users,
COUNT(c.user_id) * 1.0 / NULLIF(COUNT(*), 0) AS conversion_rate
FROM assigned a
LEFT JOIN converted c
ON c.user_id = a.user_id
GROUP BY a.variant;
This preserves assigned users with no conversion.
13. Find each customer's largest order and include ties.
WITH ranked AS (
SELECT
o.*,
RANK() OVER (
PARTITION BY customer_id
ORDER BY total_amount DESC
) AS amount_rank
FROM orders o
WHERE status = 'paid'
)
SELECT customer_id, order_id, total_amount
FROM ranked
WHERE amount_rank = 1;
Use ROW_NUMBER if only one order per customer should be returned.
14. Compute cumulative paid revenue by day.
WITH daily AS (
SELECT order_date, SUM(total_amount) AS revenue
FROM orders
WHERE status = 'paid'
GROUP BY order_date
)
SELECT
order_date,
revenue,
SUM(revenue) OVER (
ORDER BY order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
) AS cumulative_revenue
FROM daily;
15. Find events that occur before customer signup.
SELECT
e.event_id,
e.user_id,
e.event_ts,
c.signup_ts
FROM events e
JOIN customers c
ON c.customer_id = e.user_id
WHERE e.event_ts < c.signup_ts;
This is a data quality check. The ID mapping may differ in real systems.