Skip to content

Temporal, Funnel, Retention Drills

State the time boundary before opening the answer.

1. Compute new users by signup date.
SELECT signup_date, COUNT(*) AS new_users
FROM customers
GROUP BY signup_date;

If signup_date is derived from timestamp, use the engine’s date conversion consistently with timezone policy.

2. Compute daily active users from events.
SELECT event_date, COUNT(DISTINCT user_id) AS dau
FROM events
GROUP BY event_date;

Define bot filtering and identity resolution before productionizing.

3. Compute 7-day rolling active users.
WITH user_days AS (
	SELECT DISTINCT user_id, event_date
	FROM events
),
rolling AS (
	SELECT
		d.calendar_date,
		u.user_id
	FROM date_spine d
	JOIN user_days u
		ON u.event_date <= d.calendar_date
		AND u.event_date > d.calendar_date - INTERVAL '7' DAY
)
SELECT calendar_date, COUNT(DISTINCT user_id) AS wau
FROM rolling
GROUP BY calendar_date;

This can be expensive at scale; pre-aggregate user-day activity or use sketches if approximate counts are acceptable.

4. Compute signup-to-first-purchase conversion within 14 days.
WITH first_purchase AS (
	SELECT customer_id, MIN(order_ts) AS first_purchase_ts
	FROM orders
	WHERE status = 'paid'
	GROUP BY customer_id
)
SELECT
	COUNT(*) AS signups,
	SUM(
		CASE
			WHEN f.first_purchase_ts >= c.signup_ts
				AND f.first_purchase_ts < c.signup_ts + INTERVAL '14' DAY
			THEN 1 ELSE 0
		END
	) AS converted_14d
FROM customers c
LEFT JOIN first_purchase f
	ON f.customer_id = c.customer_id;
5. Build monthly retention from first activity month.
WITH first_seen AS (
	SELECT user_id, MIN(event_month) AS cohort_month
	FROM events
	GROUP BY user_id
),
activity AS (
	SELECT DISTINCT user_id, event_month
	FROM events
)
SELECT
	f.cohort_month,
	a.event_month,
	COUNT(*) AS retained_users
FROM first_seen f
JOIN activity a
	ON a.user_id = f.user_id
	AND a.event_month >= f.cohort_month
GROUP BY f.cohort_month, a.event_month;

Compute cohort age with engine-specific date diff.

6. Find users active yesterday but not today.
WITH yesterday AS (
	SELECT DISTINCT user_id
	FROM events
	WHERE event_date = DATE '2026-06-30'
),
today AS (
	SELECT DISTINCT user_id
	FROM events
	WHERE event_date = DATE '2026-07-01'
)
SELECT y.user_id
FROM yesterday y
WHERE NOT EXISTS (
	SELECT 1
	FROM today t
	WHERE t.user_id = y.user_id
);
7. Find reactivated users: inactive for 30 days, then active today.
WITH today AS (
	SELECT DISTINCT user_id
	FROM events
	WHERE event_date = DATE '2026-07-01'
)
SELECT t.user_id
FROM today t
WHERE NOT EXISTS (
	SELECT 1
	FROM events e
	WHERE e.user_id = t.user_id
		AND e.event_date >= DATE '2026-06-01'
		AND e.event_date < DATE '2026-07-01'
);

This uses a simple fixed 30-day lookback.

8. Compute first event after signup for every customer.
WITH ranked AS (
	SELECT
		c.customer_id,
		e.event_id,
		e.event_ts,
		ROW_NUMBER() OVER (
			PARTITION BY c.customer_id
			ORDER BY e.event_ts, e.event_id
		) AS rn
	FROM customers c
	JOIN events e
		ON e.user_id = c.customer_id
		AND e.event_ts >= c.signup_ts
)
SELECT customer_id, event_id, event_ts
FROM ranked
WHERE rn = 1;
9. Build a view-to-cart-to-purchase funnel with ordering.
WITH steps AS (
	SELECT
		user_id,
		MIN(CASE WHEN event_name = 'view_product' THEN event_ts END) AS view_ts,
		MIN(CASE WHEN event_name = 'add_to_cart' THEN event_ts END) AS cart_ts,
		MIN(CASE WHEN event_name = 'purchase' THEN event_ts END) AS purchase_ts
	FROM events
	GROUP BY user_id
)
SELECT
	COUNT(*) AS users,
	SUM(CASE WHEN view_ts IS NOT NULL THEN 1 ELSE 0 END) AS viewed,
	SUM(CASE WHEN cart_ts > view_ts THEN 1 ELSE 0 END) AS carted_after_view,
	SUM(CASE WHEN purchase_ts > cart_ts THEN 1 ELSE 0 END) AS purchased_after_cart
FROM steps;

For repeated funnels, this simple first-step approach may not be enough.

10. Find purchases within 24 hours after a product view.
SELECT DISTINCT v.user_id
FROM events v
JOIN events p
	ON p.user_id = v.user_id
	AND p.event_name = 'purchase'
	AND p.event_ts > v.event_ts
	AND p.event_ts <= v.event_ts + INTERVAL '24' HOUR
WHERE v.event_name = 'view_product';

At scale, prefilter both sides and watch for many-to-many event joins.

11. Sessionize events with a 30-minute inactivity gap.
WITH ordered AS (
	SELECT
		user_id,
		event_id,
		event_ts,
		LAG(event_ts) OVER (
			PARTITION BY user_id
			ORDER BY event_ts, event_id
		) AS prev_event_ts
	FROM events
),
marked AS (
	SELECT
		*,
		CASE
			WHEN prev_event_ts IS NULL THEN 1
			WHEN event_ts > prev_event_ts + INTERVAL '30' MINUTE THEN 1
			ELSE 0
		END AS new_session
	FROM ordered
)
SELECT
	user_id,
	SUM(new_session) OVER (
		PARTITION BY user_id
		ORDER BY event_ts, event_id
		ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
	) AS session_number,
	event_id,
	event_ts
FROM marked;
12. Find users with a 5-day active streak.
WITH active_days AS (
	SELECT DISTINCT user_id, day_number
	FROM daily_user_activity
),
numbered AS (
	SELECT
		user_id,
		day_number,
		day_number - ROW_NUMBER() OVER (
			PARTITION BY user_id
			ORDER BY day_number
		) AS island_key
	FROM active_days
),
streaks AS (
	SELECT user_id, COUNT(*) AS streak_days
	FROM numbered
	GROUP BY user_id, island_key
)
SELECT DISTINCT user_id
FROM streaks
WHERE streak_days >= 5;
13. Find gaps longer than 14 days between orders.
WITH ordered AS (
	SELECT
		customer_id,
		order_id,
		order_ts,
		LAG(order_ts) OVER (
			PARTITION BY customer_id
			ORDER BY order_ts, order_id
		) AS prev_order_ts
	FROM orders
	WHERE status = 'paid'
)
SELECT customer_id, order_id, prev_order_ts, order_ts
FROM ordered
WHERE order_ts > prev_order_ts + INTERVAL '14' DAY;
14. Detect overlapping customer plan intervals.
SELECT
	a.customer_id,
	a.plan_id AS plan_a,
	b.plan_id AS plan_b
FROM customer_plan_intervals a
JOIN customer_plan_intervals b
	ON b.customer_id = a.customer_id
	AND b.plan_id > a.plan_id
	AND a.valid_from < b.valid_to
	AND b.valid_from < a.valid_to;

For half-open intervals, overlap is a.start < b.end AND b.start < a.end.

15. Join orders to the customer plan active at order time.
SELECT o.order_id, o.customer_id, p.plan_name
FROM orders o
JOIN customer_plan_intervals p
	ON p.customer_id = o.customer_id
	AND o.order_ts >= p.valid_from
	AND o.order_ts < p.valid_to;

This is an interval join.

16. Find customers with no activity in the 90 days after signup.
SELECT c.customer_id
FROM customers c
WHERE NOT EXISTS (
	SELECT 1
	FROM events e
	WHERE e.user_id = c.customer_id
		AND e.event_ts >= c.signup_ts
		AND e.event_ts < c.signup_ts + INTERVAL '90' DAY
);
17. Compute order frequency in the first 30 days after signup.
SELECT
	c.customer_id,
	COUNT(o.order_id) AS orders_30d
FROM customers c
LEFT JOIN orders o
	ON o.customer_id = c.customer_id
	AND o.order_ts >= c.signup_ts
	AND o.order_ts < c.signup_ts + INTERVAL '30' DAY
	AND o.status = 'paid'
GROUP BY c.customer_id;
18. Find first purchase after first product view.
WITH first_view AS (
	SELECT user_id, MIN(event_ts) AS first_view_ts
	FROM events
	WHERE event_name = 'view_product'
	GROUP BY user_id
),
purchases_after AS (
	SELECT
		v.user_id,
		MIN(e.event_ts) AS first_purchase_after_view_ts
	FROM first_view v
	JOIN events e
		ON e.user_id = v.user_id
		AND e.event_name = 'purchase'
		AND e.event_ts > v.first_view_ts
	GROUP BY v.user_id
)
SELECT *
FROM purchases_after;
19. Create a cohort table with zero retained users included.

Use a cohort spine crossed with age periods, then left join activity.

WITH cohort_sizes AS (
	SELECT cohort_month, COUNT(*) AS cohort_users
	FROM user_cohorts
	GROUP BY cohort_month
),
cohort_age_spine AS (
	SELECT c.cohort_month, a.age_month
	FROM cohort_sizes c
	CROSS JOIN age_spine a
),
retention AS (
	SELECT cohort_month, age_month, COUNT(DISTINCT user_id) AS retained_users
	FROM user_month_activity
	GROUP BY cohort_month, age_month
)
SELECT
	s.cohort_month,
	s.age_month,
	COALESCE(r.retained_users, 0) AS retained_users
FROM cohort_age_spine s
LEFT JOIN retention r
	ON r.cohort_month = s.cohort_month
	AND r.age_month = s.age_month;
20. Find late-arriving events where ingest date is more than three days after event date.
SELECT event_id, event_ts, ingest_ts
FROM raw_events
WHERE ingest_ts >= event_ts + INTERVAL '3' DAY;

Timestamp difference syntax can vary. Use this as a semantic sketch.

21. Compute paid revenue by event date and processing date.
SELECT
	order_date,
	processing_date,
	SUM(total_amount) AS paid_revenue
FROM orders
WHERE status = 'paid'
GROUP BY order_date, processing_date;

This helps separate business event timing from pipeline arrival timing.

22. Find users whose first purchase happened before signup.
WITH first_purchase AS (
	SELECT customer_id, MIN(order_ts) AS first_purchase_ts
	FROM orders
	WHERE status = 'paid'
	GROUP BY customer_id
)
SELECT c.customer_id, c.signup_ts, f.first_purchase_ts
FROM customers c
JOIN first_purchase f
	ON f.customer_id = c.customer_id
WHERE f.first_purchase_ts < c.signup_ts;

This is usually a data quality or identity stitching issue.

23. Compute cancellation within 7 days of paid order.
SELECT
	o.order_id,
	CASE
		WHEN EXISTS (
			SELECT 1
			FROM order_events e
			WHERE e.order_id = o.order_id
				AND e.event_name = 'cancelled'
				AND e.event_ts > o.order_ts
				AND e.event_ts <= o.order_ts + INTERVAL '7' DAY
		)
		THEN 1 ELSE 0
	END AS cancelled_7d
FROM orders o
WHERE o.status = 'paid';
24. Find the first event of each session from a sessionized events table.
WITH ranked AS (
	SELECT
		*,
		ROW_NUMBER() OVER (
			PARTITION BY user_id, session_number
			ORDER BY event_ts, event_id
		) AS rn
	FROM sessionized_events
)
SELECT user_id, session_number, event_id, event_ts
FROM ranked
WHERE rn = 1;
25. Explain event time vs processing time in one sentence.

Event time is when the business action happened; processing time is when the data system observed or processed it. Business metrics usually use event time, while pipeline monitoring often uses processing time.